1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4 
5 #include "spl_fixpt31_32.h"
6 
7 static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL };
8 static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL };
9 static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL };
10 
abs_i64(long long arg)11 static inline unsigned long long abs_i64(
12 	long long arg)
13 {
14 	if (arg > 0)
15 		return (unsigned long long)arg;
16 	else
17 		return (unsigned long long)(-arg);
18 }
19 
20 /*
21  * @brief
22  * result = dividend / divisor
23  * *remainder = dividend % divisor
24  */
complete_integer_division_u64(unsigned long long dividend,unsigned long long divisor,unsigned long long * remainder)25 static inline unsigned long long complete_integer_division_u64(
26 	unsigned long long dividend,
27 	unsigned long long divisor,
28 	unsigned long long *remainder)
29 {
30 	unsigned long long result;
31 
32 	ASSERT(divisor);
33 
34 	result = spl_div64_u64_rem(dividend, divisor, remainder);
35 
36 	return result;
37 }
38 
39 
40 #define FRACTIONAL_PART_MASK \
41 	((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
42 
43 #define GET_INTEGER_PART(x) \
44 	((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
45 
46 #define GET_FRACTIONAL_PART(x) \
47 	(FRACTIONAL_PART_MASK & (x))
48 
spl_fixpt_from_fraction(long long numerator,long long denominator)49 struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator)
50 {
51 	struct spl_fixed31_32 res;
52 
53 	bool arg1_negative = numerator < 0;
54 	bool arg2_negative = denominator < 0;
55 
56 	unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
57 	unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
58 
59 	unsigned long long remainder;
60 
61 	/* determine integer part */
62 
63 	unsigned long long res_value = complete_integer_division_u64(
64 		arg1_value, arg2_value, &remainder);
65 
66 	ASSERT(res_value <= LONG_MAX);
67 
68 	/* determine fractional part */
69 	{
70 		unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
71 
72 		do {
73 			remainder <<= 1;
74 
75 			res_value <<= 1;
76 
77 			if (remainder >= arg2_value) {
78 				res_value |= 1;
79 				remainder -= arg2_value;
80 			}
81 		} while (--i != 0);
82 	}
83 
84 	/* round up LSB */
85 	{
86 		unsigned long long summand = (remainder << 1) >= arg2_value;
87 
88 		ASSERT(res_value <= LLONG_MAX - summand);
89 
90 		res_value += summand;
91 	}
92 
93 	res.value = (long long)res_value;
94 
95 	if (arg1_negative ^ arg2_negative)
96 		res.value = -res.value;
97 
98 	return res;
99 }
100 
spl_fixpt_mul(struct spl_fixed31_32 arg1,struct spl_fixed31_32 arg2)101 struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
102 {
103 	struct spl_fixed31_32 res;
104 
105 	bool arg1_negative = arg1.value < 0;
106 	bool arg2_negative = arg2.value < 0;
107 
108 	unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
109 	unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
110 
111 	unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
112 	unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
113 
114 	unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
115 	unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
116 
117 	unsigned long long tmp;
118 
119 	res.value = arg1_int * arg2_int;
120 
121 	ASSERT(res.value <= (long long)LONG_MAX);
122 
123 	res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
124 
125 	tmp = arg1_int * arg2_fra;
126 
127 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
128 
129 	res.value += tmp;
130 
131 	tmp = arg2_int * arg1_fra;
132 
133 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
134 
135 	res.value += tmp;
136 
137 	tmp = arg1_fra * arg2_fra;
138 
139 	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
140 		(tmp >= (unsigned long long)spl_fixpt_half.value);
141 
142 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
143 
144 	res.value += tmp;
145 
146 	if (arg1_negative ^ arg2_negative)
147 		res.value = -res.value;
148 
149 	return res;
150 }
151 
spl_fixpt_sqr(struct spl_fixed31_32 arg)152 struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg)
153 {
154 	struct spl_fixed31_32 res;
155 
156 	unsigned long long arg_value = abs_i64(arg.value);
157 
158 	unsigned long long arg_int = GET_INTEGER_PART(arg_value);
159 
160 	unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
161 
162 	unsigned long long tmp;
163 
164 	res.value = arg_int * arg_int;
165 
166 	ASSERT(res.value <= (long long)LONG_MAX);
167 
168 	res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
169 
170 	tmp = arg_int * arg_fra;
171 
172 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
173 
174 	res.value += tmp;
175 
176 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
177 
178 	res.value += tmp;
179 
180 	tmp = arg_fra * arg_fra;
181 
182 	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
183 		(tmp >= (unsigned long long)spl_fixpt_half.value);
184 
185 	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
186 
187 	res.value += tmp;
188 
189 	return res;
190 }
191 
spl_fixpt_recip(struct spl_fixed31_32 arg)192 struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg)
193 {
194 	/*
195 	 * @note
196 	 * Good idea to use Newton's method
197 	 */
198 
199 	ASSERT(arg.value);
200 
201 	return spl_fixpt_from_fraction(
202 		spl_fixpt_one.value,
203 		arg.value);
204 }
205 
spl_fixpt_sinc(struct spl_fixed31_32 arg)206 struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg)
207 {
208 	struct spl_fixed31_32 square;
209 
210 	struct spl_fixed31_32 res = spl_fixpt_one;
211 
212 	int n = 27;
213 
214 	struct spl_fixed31_32 arg_norm = arg;
215 
216 	if (spl_fixpt_le(
217 		spl_fixpt_two_pi,
218 		spl_fixpt_abs(arg))) {
219 		arg_norm = spl_fixpt_sub(
220 			arg_norm,
221 			spl_fixpt_mul_int(
222 				spl_fixpt_two_pi,
223 				(int)spl_div64_s64(
224 					arg_norm.value,
225 					spl_fixpt_two_pi.value)));
226 	}
227 
228 	square = spl_fixpt_sqr(arg_norm);
229 
230 	do {
231 		res = spl_fixpt_sub(
232 			spl_fixpt_one,
233 			spl_fixpt_div_int(
234 				spl_fixpt_mul(
235 					square,
236 					res),
237 				n * (n - 1)));
238 
239 		n -= 2;
240 	} while (n > 2);
241 
242 	if (arg.value != arg_norm.value)
243 		res = spl_fixpt_div(
244 			spl_fixpt_mul(res, arg_norm),
245 			arg);
246 
247 	return res;
248 }
249 
spl_fixpt_sin(struct spl_fixed31_32 arg)250 struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg)
251 {
252 	return spl_fixpt_mul(
253 		arg,
254 		spl_fixpt_sinc(arg));
255 }
256 
spl_fixpt_cos(struct spl_fixed31_32 arg)257 struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg)
258 {
259 	/* TODO implement argument normalization */
260 
261 	const struct spl_fixed31_32 square = spl_fixpt_sqr(arg);
262 
263 	struct spl_fixed31_32 res = spl_fixpt_one;
264 
265 	int n = 26;
266 
267 	do {
268 		res = spl_fixpt_sub(
269 			spl_fixpt_one,
270 			spl_fixpt_div_int(
271 				spl_fixpt_mul(
272 					square,
273 					res),
274 				n * (n - 1)));
275 
276 		n -= 2;
277 	} while (n != 0);
278 
279 	return res;
280 }
281 
282 /*
283  * @brief
284  * result = exp(arg),
285  * where abs(arg) < 1
286  *
287  * Calculated as Taylor series.
288  */
fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)289 static struct spl_fixed31_32 fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)
290 {
291 	unsigned int n = 9;
292 
293 	struct spl_fixed31_32 res = spl_fixpt_from_fraction(
294 		n + 2,
295 		n + 1);
296 	/* TODO find correct res */
297 
298 	ASSERT(spl_fixpt_lt(arg, spl_fixpt_one));
299 
300 	do
301 		res = spl_fixpt_add(
302 			spl_fixpt_one,
303 			spl_fixpt_div_int(
304 				spl_fixpt_mul(
305 					arg,
306 					res),
307 				n));
308 	while (--n != 1);
309 
310 	return spl_fixpt_add(
311 		spl_fixpt_one,
312 		spl_fixpt_mul(
313 			arg,
314 			res));
315 }
316 
spl_fixpt_exp(struct spl_fixed31_32 arg)317 struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg)
318 {
319 	/*
320 	 * @brief
321 	 * Main equation is:
322 	 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
323 	 * where m = round(x / ln(2)), r = x - m * ln(2)
324 	 */
325 
326 	if (spl_fixpt_le(
327 		spl_fixpt_ln2_div_2,
328 		spl_fixpt_abs(arg))) {
329 		int m = spl_fixpt_round(
330 			spl_fixpt_div(
331 				arg,
332 				spl_fixpt_ln2));
333 
334 		struct spl_fixed31_32 r = spl_fixpt_sub(
335 			arg,
336 			spl_fixpt_mul_int(
337 				spl_fixpt_ln2,
338 				m));
339 
340 		ASSERT(m != 0);
341 
342 		ASSERT(spl_fixpt_lt(
343 			spl_fixpt_abs(r),
344 			spl_fixpt_one));
345 
346 		if (m > 0)
347 			return spl_fixpt_shl(
348 				fixed31_32_exp_from_taylor_series(r),
349 				(unsigned char)m);
350 		else
351 			return spl_fixpt_div_int(
352 				fixed31_32_exp_from_taylor_series(r),
353 				1LL << -m);
354 	} else if (arg.value != 0)
355 		return fixed31_32_exp_from_taylor_series(arg);
356 	else
357 		return spl_fixpt_one;
358 }
359 
spl_fixpt_log(struct spl_fixed31_32 arg)360 struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg)
361 {
362 	struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one);
363 	/* TODO improve 1st estimation */
364 
365 	struct spl_fixed31_32 error;
366 
367 	ASSERT(arg.value > 0);
368 	/* TODO if arg is negative, return NaN */
369 	/* TODO if arg is zero, return -INF */
370 
371 	do {
372 		struct spl_fixed31_32 res1 = spl_fixpt_add(
373 			spl_fixpt_sub(
374 				res,
375 				spl_fixpt_one),
376 			spl_fixpt_div(
377 				arg,
378 				spl_fixpt_exp(res)));
379 
380 		error = spl_fixpt_sub(
381 			res,
382 			res1);
383 
384 		res = res1;
385 		/* TODO determine max_allowed_error based on quality of exp() */
386 	} while (abs_i64(error.value) > 100ULL);
387 
388 	return res;
389 }
390 
391 
392 /* this function is a generic helper to translate fixed point value to
393  * specified integer format that will consist of integer_bits integer part and
394  * fractional_bits fractional part. For example it is used in
395  * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
396  * part in 32 bits. It is used in hw programming (scaler)
397  */
398 
ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits)399 static inline unsigned int ux_dy(
400 	long long value,
401 	unsigned int integer_bits,
402 	unsigned int fractional_bits)
403 {
404 	/* 1. create mask of integer part */
405 	unsigned int result = (1 << integer_bits) - 1;
406 	/* 2. mask out fractional part */
407 	unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
408 	/* 3. shrink fixed point integer part to be of integer_bits width*/
409 	result &= GET_INTEGER_PART(value);
410 	/* 4. make space for fractional part to be filled in after integer */
411 	result <<= fractional_bits;
412 	/* 5. shrink fixed point fractional part to of fractional_bits width*/
413 	fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
414 	/* 6. merge the result */
415 	return result | fractional_part;
416 }
417 
clamp_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits,unsigned int min_clamp)418 static inline unsigned int clamp_ux_dy(
419 	long long value,
420 	unsigned int integer_bits,
421 	unsigned int fractional_bits,
422 	unsigned int min_clamp)
423 {
424 	unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits);
425 
426 	if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
427 		return (1 << (integer_bits + fractional_bits)) - 1;
428 	else if (truncated_val > min_clamp)
429 		return truncated_val;
430 	else
431 		return min_clamp;
432 }
433 
spl_fixpt_u4d19(struct spl_fixed31_32 arg)434 unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg)
435 {
436 	return ux_dy(arg.value, 4, 19);
437 }
438 
spl_fixpt_u3d19(struct spl_fixed31_32 arg)439 unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg)
440 {
441 	return ux_dy(arg.value, 3, 19);
442 }
443 
spl_fixpt_u2d19(struct spl_fixed31_32 arg)444 unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg)
445 {
446 	return ux_dy(arg.value, 2, 19);
447 }
448 
spl_fixpt_u0d19(struct spl_fixed31_32 arg)449 unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg)
450 {
451 	return ux_dy(arg.value, 0, 19);
452 }
453 
spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)454 unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)
455 {
456 	return clamp_ux_dy(arg.value, 0, 14, 1);
457 }
458 
spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)459 unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)
460 {
461 	return clamp_ux_dy(arg.value, 0, 10, 1);
462 }
463 
spl_fixpt_s4d19(struct spl_fixed31_32 arg)464 int spl_fixpt_s4d19(struct spl_fixed31_32 arg)
465 {
466 	if (arg.value < 0)
467 		return -(int)ux_dy(spl_fixpt_abs(arg).value, 4, 19);
468 	else
469 		return ux_dy(arg.value, 4, 19);
470 }
471 
spl_fixpt_from_ux_dy(unsigned int value,unsigned int integer_bits,unsigned int fractional_bits)472 struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value,
473 	unsigned int integer_bits,
474 	unsigned int fractional_bits)
475 {
476 	struct spl_fixed31_32 fixpt_value = spl_fixpt_zero;
477 	struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero;
478 	long long frac_mask = ((long long)1 << (long long)integer_bits) - 1;
479 
480 	fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
481 	frac_mask = frac_mask << fractional_bits;
482 	fixpt_int_value.value = value & frac_mask;
483 	fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
484 	fixpt_value.value |= fixpt_int_value.value;
485 	return fixpt_value;
486 }
487 
spl_fixpt_from_int_dy(unsigned int int_value,unsigned int frac_value,unsigned int integer_bits,unsigned int fractional_bits)488 struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value,
489 	unsigned int frac_value,
490 	unsigned int integer_bits,
491 	unsigned int fractional_bits)
492 {
493 	struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value);
494 
495 	fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
496 	return fixpt_value;
497 }
498