m68k/fpsp040/setox.S

6 |	number.	setoxm1 computes exp(X)-1, and setoxm1d computes
7 |	exp(X)-1 for denormalized X.
16 |	exp(X) or exp(X)-1 returned in floating-point register fp0.
34 |	argument X whose magnitude is less than 16380 log2, which
40 |	argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes
52 |	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.
60 |		1.1	If |X| >= 2^(-65), go to Step 1.3.
62 |		1.3	If |X| < 16380 log(2), go to Step 2.
64 |	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
66 |		 compact representation of |X| is used. This format is a
68 |		 the sign and biased exponent field of |X|; the lower 16
70 |		 explicit bit) bits of |X|. Consequently, the comparisons
72 |		 Note also that the constant 16380 log(2) used in Step 1.3
74 |		 to Step 2 guarantees |X| < 16380 log(2). There is no harm
75 |		 to have a small number of cases where |X| is less than,
76 |		 but close to, 16380 log(2) and the branch to Step 9 is
79 |	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
80 |		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken)
81 |		2.2	N := round-to-nearest-integer( X * 64/log2 ).
82 |		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
84 |		2.5	Calculate the address of the stored value of 2^(J/64).
85 |		2.6	Create the value Scale = 2^M.
88 |			Z := X * constant
93 |			constant := single-precision( 64/log 2 ).
99 |			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).
103 |	Step 3.	Calculate X - N*log2/64.
104 |		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
110 |		 c) The calculation X+N*L1 is also exact due to cancellation.
111 |		 Thus, R is practically X+N(L1+L2) to full 64 bits.
115 |			N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)
116 |			X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5
117 |			X*64/log2 - N	=	f - eps*X 64/log2
118 |			X - N*log2/64	=	f*log2/64 - eps*X
121 |		 Now |X| <= 16446 log2, thus
123 |			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64
130 |		 made as "short" as possible: A1 (which is 1/2), A4 and A5
133 |			|p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.
141 |	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by
143 |		 where T and t are the stored values for 2^(J/64).
144 |	Notes:	2^(J/64) is stored as T and t where T+t approximates
145 |		 2^(J/64) to roughly 85 bits; T is in extended precision
148 |		 reason for such a special form is that T-1, T-2, and T-8
152 |	Step 6.	Reconstruction of exp(X)
153 |			exp(X) = 2^M * 2^(J/64) * exp(R).
158 |	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,
159 |		 |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will
162 |			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.
163 |		 Hence, exp(X) may overflow or underflow or neither.
164 |		 When that is the case, AdjScale = 2^(M1) where M1 is
172 |	Step 7.	Return 1 + X.
173 |		7.1	ans := X
176 |	Notes:	For non-zero X, the inexact exception will always be
178 |		 Note also that we use the FMOVEM instruction to move X
180 |		 the FMOVEM may not seem relevant since X is normalized,
185 |	Step 8.	Handle exp(X) where |X| >= 16380log2.
186 |		8.1	If |X| > 16480 log2, go to Step 9.
188 |		8.2	N := round-to-integer( X * 64/log2 )
190 |		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1.
191 |		8.5	Calculate the address of the stored value 2^(J/64).
192 |		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.
196 |	Step 9.	Handle exp(X), |X| > 16480 log2.
197 |		9.1	If X < 0, go to 9.3
202 |	Notes:	Exp(X) will surely overflow or underflow, depending on
203 |		 X's sign. "Huge" and "Tiny" are respectively large/tiny
214 |	Step 2.	Return	ans := X + ans. Exit.
215 |	Notes:	This will return X with the appropriate rounding
221 |	Step 1.	Check |X|
222 |		1.1	If |X| >= 1/4, go to Step 1.3.
224 |		1.3	If |X| < 70 log(2), go to Step 2.
226 |	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
227 |		 However, it is conceivable |X| can be small very often
228 |		 because EXPM1 is intended to evaluate exp(X)-1 accurately
229 |		 when |X| is small. For further details on the comparisons,
232 |	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
233 |		2.1	N := round-to-nearest-integer( X * 64/log2 ).
234 |		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
236 |		2.4	Calculate the address of the stored value of 2^(J/64).
237 |		2.5	Create the values Sc = 2^M and OnebySc := -2^(-M).
238 |	Notes:	See the notes on Step 2 of setox.
240 |	Step 3.	Calculate X - N*log2/64.
241 |		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
244 |		 shows that |R| <= 0.0055 (note that |X| <= 70 log2 in
250 |		 made as "short" as possible: A1 (which is 1/2), A5 and A6
253 |			|p - (exp(R)-1)| <	|R| * 2^(-72.7)
261 |	Step 5.	Compute 2^(J/64)*p by
263 |		 where T and t are the stored values for 2^(J/64).
264 |	Notes:	2^(J/64) is stored as T and t where T+t approximates
265 |		 2^(J/64) to roughly 85 bits; T is in extended precision
268 |		 reason for such a special form is that T-1, T-2, and T-8
271 |		 bigger than 2^(-67.7) compared to the final result.
273 |	Step 6.	Reconstruction of exp(X)-1
274 |			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).
285 |	Step 7.	exp(X)-1 for |X| < 1/4.
286 |		7.1	If |X| >= 2^(-65), go to Step 9.
289 |	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).
290 |		8.1	If |X| < 2^(-16312), goto 8.3
291 |		8.2	Restore FPCR; return ans := X - 2^(-16382). Exit.
292 |		8.3	X := X * 2^(140).
293 |		8.4	Restore FPCR; ans := ans - 2^(-16382).
294 |		 Return ans := ans*2^(140). Exit
295 |	Notes:	The idea is to return "X - tiny" under the user
298 |		 best we can. For |X| >= 2^(-16312), the straightforward
301 |	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial
302 |			p = X + X*X*(B1 + X*(B2 + ... + X*B12))
304 |		 made as "short" as possible: B1 (which is 1/2), B9 to B12
308 |			|p - (exp(X)-1)| < |X| 2^(-70.6)
309 |		 for all |X| <= 0.251.
312 |		 as	X + ( S*B1 +	Q ) where S = X*X and
313 |			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))
316 |			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +
319 |	Step 10.	Calculate exp(X)-1 for |X| >= 70 log 2.
320 |		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical
322 |		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes.
325 |		 Return ans := ans + 2^(-126). Exit.
337 |setox	idnt	2,1 | Motorola 040 Floating Point Software Package
450 |--entry point for EXP(X), X is denormalized
453 	oril		#0x00800000,%d0		| ...sign(X)*2^(-126)
462 |--entry point for EXP(X), here X is finite, non-zero, and not NaN's
465 	movel		(%a0),%d0	 | ...load part of input X
466 	andil		#0x7FFF0000,%d0	| ...biased expo. of X
467 	cmpil		#0x3FBE0000,%d0	| ...2^(-65)
472 |--The case |X| >= 2^(-65)
473 	movew		4(%a0),%d0	| ...expo. and partial sig. of |X|
479 |--Step 2.
480 |--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
484 	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
487 	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
494 	addal		%d0,%a1		| ...address of 2^(J/64)
497 	addiw		#0x3FFF,%d0	| ...biased expo. of 2^(M)
502 |--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
503 |--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
507 	faddx		%fp1,%fp0		| ...X + N*L1
521 |	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
531 	movew		%d0,SCALE(%a6)	| ...SCALE is 2^(M) in extended
532 	clrw		SCALE+2(%a6)
545 	fmovex		(%a1)+,%fp1	| ...fp1 is lead. pt. of 2^(J/64)
551 |--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
553 	fmulx		%fp1,%fp0		| ...2^(J/64)*(Exp(R)-1)
555 	fadds		(%a1),%fp0	| ...accurate 2^(J/64)
557 	faddx		%fp1,%fp0		| ...2^(J/64) + 2^(J/64)*...
567 	fmulx		SCALE(%a6),%fp0	| ...multiply 2^(M)
572 	fmovemx	(%a0),%fp0-%fp0	| ...in case X is denormalized
574 	fadds		#0x3F800000,%fp0	| ...1+X in user mode
585 	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
588 	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
594 	addal		%d0,%a1			| ...address of 2^(J/64)
600 	addiw		#0x3FFF,%d0		| ...biased expo. of 2^(M1)
601 	movew		%d0,ADJSCALE(%a6)		| ...ADJSCALE := 2^(M1)
602 	clrw		ADJSCALE+2(%a6)
606 	addiw		#0x3FFF,%d0		| ...biased expo. of 2^(M)
620 |--entry point for EXPM1(X), here X is denormalized
627 |--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
631 	movel		(%a0),%d0	 | ...load part of input X
632 	andil		#0x7FFF0000,%d0	| ...biased expo. of X
634 	bges		EM1CON1	 | ...|X| >= 1/4
639 |--The case |X| >= 1/4
640 	movew		4(%a0),%d0	| ...expo. and partial sig. of |X|
642 	bles		EM1MAIN	 | ...1/4 <= |X| <= 70log2
646 |--Step 2.
647 |--This is the case:	1/4 <= |X| <= 70 log2.
651 	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
654 	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
661 	addal		%d0,%a1			| ...address of 2^(J/64)
668 |--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
669 |--a0 points to 2^(J/64), D0 and a1 both contain M
673 	faddx		%fp1,%fp0	 | ...X + N*L1
676 	addiw		#0x3FFF,%d0		| ...D0 is biased expo. of 2^M
688 |	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
696 	movew		%d0,SC(%a6)		| ...SC is 2^(M) in extended
697 	clrw		SC+2(%a6)
705 	addiw		#0x3FFF,%d0	| ...biased expo. of 2^(-M)
710 	oriw		#0x8000,%d0	| ...signed/expo. of -2^(-M)
711 	movew		%d0,ONEBYSC(%a6)	| ...OnebySc is -2^(-M)
712 	clrw		ONEBYSC+2(%a6)
727 |--Compute 2^(J/64)*p
729 	fmulx		(%a1),%fp0	| ...2^(J/64)*(Exp(R)-1)
767 |--Step 7	|X| < 1/4.
768 	cmpil		#0x3FBE0000,%d0	| ...2^(-65)
772 |--Step 8	|X| < 2^(-65)
773 	cmpil		#0x00330000,%d0	| ...2^(-16312)
776 	movel		#0x80010000,SC(%a6)	| ...SC is -2^(-16382)
799 |--Step 9	exp(X)-1 by a simple polynomial
800 	fmovex		(%a0),%fp0	| ...fp0 is X
801 	fmulx		%fp0,%fp0		| ...fp0 is S := X*X
836 	fmulx		(%a0),%fp1	| ...fp1 is X*S*(B2...
853 |--Step 10	|X| > 70 log2
860 	fadds		#0x00800000,%fp0	| ...-1 + 2^(-126)