blob: de6b4794ff50fd417cf02e2c1c2c20ef28fd4456 [file] [log] [blame]
bigbiff673c7ae2020-12-02 19:44:56 -05001/* libs/pixelflinger/fixed.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19
20#include <private/pixelflinger/ggl_context.h>
21#include <private/pixelflinger/ggl_fixed.h>
22
23
24// ------------------------------------------------------------------------
25
26int32_t gglRecipQNormalized(int32_t x, int* exponent)
27{
28 const int32_t s = x>>31;
29 uint32_t a = s ? -x : x;
30
31 // the result will overflow, so just set it to the biggest/inf value
32 if (ggl_unlikely(a <= 2LU)) {
33 *exponent = 0;
34 return s ? FIXED_MIN : FIXED_MAX;
35 }
36
37 // Newton-Raphson iteration:
38 // x = r*(2 - a*r)
39
40 const int32_t lz = gglClz(a);
41 a <<= lz; // 0.32
42 uint32_t r = a;
43 // note: if a == 0x80000000, this means x was a power-of-2, in this
44 // case we don't need to compute anything. We get the reciprocal for
45 // (almost) free.
46 if (a != 0x80000000) {
47 r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
48 // 0.32 + 2.30 = 2.62 -> 2.30
49 // 2.30 + 2.30 = 4.60 -> 2.30
50 r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
51 r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
52 }
53
54 // shift right 1-bit to make room for the sign bit
55 *exponent = 30-lz-1;
56 r >>= 1;
57 return s ? -r : r;
58}
59
60int32_t gglRecipQ(GGLfixed x, int q)
61{
62 int shift;
63 x = gglRecipQNormalized(x, &shift);
64 shift += 16-q;
65 if (shift > 0)
66 x += 1L << (shift-1); // rounding
67 x >>= shift;
68 return x;
69}
70
71// ------------------------------------------------------------------------
72
73static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
74 // 1/sqrt(x) with x = 1-N/16, N=[8...1]
75 0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
76};
77
78GGLfixed gglSqrtRecipx(GGLfixed x)
79{
80 if (x == 0) return FIXED_MAX;
81 if (x == FIXED_ONE) return x;
82 const GGLfixed a = x;
83 const int32_t lz = gglClz(x);
84 x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
85 const int32_t exp = lz - 16;
86 if (exp <= 0) x >>= -exp>>1;
87 else x <<= (exp>>1) + (exp & 1);
88 if (exp & 1) {
89 x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
90 }
91 // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
92 x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
93 x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
94 return x;
95}
96
97GGLfixed gglSqrtx(GGLfixed a)
98{
99 // Compute a full precision square-root (24 bits accuracy)
100 GGLfixed r = 0;
101 GGLfixed bit = 0x800000;
102 int32_t bshift = 15;
103 do {
104 GGLfixed temp = bit + (r<<1);
105 if (bshift >= 8) temp <<= (bshift-8);
106 else temp >>= (8-bshift);
107 if (a >= temp) {
108 r += bit;
109 a -= temp;
110 }
111 bshift--;
112 } while (bit>>=1);
113 return r;
114}
115
116// ------------------------------------------------------------------------
117
118static const GGLfixed ggl_log_approx_tab[] = {
119 // -ln(x)/ln(2) with x = N/16, N=[8...16]
120 0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
121};
122
123static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
124 0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
125};
126
127GGLfixed gglPowx(GGLfixed x, GGLfixed y)
128{
129 // prerequisite: 0 <= x <= 1, and y >=0
130
131 // pow(x,y) = 2^(y*log2(x))
132 // = 2^(y*log2(x*(2^exp)*(2^-exp))))
133 // = 2^(y*(log2(X)-exp))
134 // = 2^(log2(X)*y - y*exp)
135 // = 2^( - (-log2(X)*y + y*exp) )
136
137 int32_t exp = gglClz(x) - 16;
138 GGLfixed f = x << exp;
139 x = (f & 0x0FFF)<<4;
140 f = (f >> 12) & 0x7;
141 GGLfixed p = gglMulAddx(
142 ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
143 ggl_log_approx_tab[f]);
144 p = gglMulAddx(p, y, y*exp);
145 exp = gglFixedToIntFloor(p);
146 if (exp < 31) {
147 p = gglFracx(p);
148 x = (p & 0x1FFF)<<3;
149 p >>= 13;
150 p = gglMulAddx(
151 ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
152 ggl_alog_approx_tab[p]);
153 p >>= exp;
154 } else {
155 p = 0;
156 }
157 return p;
158 // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
159}
160
161// ------------------------------------------------------------------------
162
163int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
164{
165 //int32_t r =int32_t((int64_t(n)<<i)/d);
166 const int32_t ds = n^d;
167 if (n<0) n = -n;
168 if (d<0) d = -d;
169 int nd = gglClz(d) - gglClz(n);
170 i += nd + 1;
171 if (nd > 0) d <<= nd;
172 else n <<= -nd;
173 uint32_t q = 0;
174
175 int j = i & 7;
176 i >>= 3;
177
178 // gcc deals with the code below pretty well.
179 // we get 3.75 cycles per bit in the main loop
180 // and 8 cycles per bit in the termination loop
181 if (ggl_likely(i)) {
182 n -= d;
183 do {
184 q <<= 8;
185 if (n>=0) q |= 128;
186 else n += d;
187 n = n*2 - d;
188 if (n>=0) q |= 64;
189 else n += d;
190 n = n*2 - d;
191 if (n>=0) q |= 32;
192 else n += d;
193 n = n*2 - d;
194 if (n>=0) q |= 16;
195 else n += d;
196 n = n*2 - d;
197 if (n>=0) q |= 8;
198 else n += d;
199 n = n*2 - d;
200 if (n>=0) q |= 4;
201 else n += d;
202 n = n*2 - d;
203 if (n>=0) q |= 2;
204 else n += d;
205 n = n*2 - d;
206 if (n>=0) q |= 1;
207 else n += d;
208
209 if (--i == 0)
210 goto finish;
211
212 n = n*2 - d;
213 } while(true);
214 do {
215 q <<= 1;
216 n = n*2 - d;
217 if (n>=0) q |= 1;
218 else n += d;
219 finish: ;
220 } while (j--);
221 return (ds<0) ? -q : q;
222 }
223
224 n -= d;
225 if (n>=0) q |= 1;
226 else n += d;
227 j--;
228 goto finish;
229}
230
231// ------------------------------------------------------------------------
232
233// assumes that the int32_t values of a, b, and c are all positive
234// use when both a and b are larger than c
235
236template <typename T>
237static inline void swap(T& a, T& b) {
238 T t(a);
239 a = b;
240 b = t;
241}
242
243static __attribute__((noinline))
244int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
245{
246 // first we compute a*b as a 64-bit integer
247 // (GCC generates umull with the code below)
248 uint64_t ab = uint64_t(a)*b;
249 uint32_t hi = ab>>32;
250 uint32_t lo = ab;
251 uint32_t result;
252
253 // now perform the division
254 if (hi >= c) {
255 overflow:
256 result = 0x7fffffff; // basic overflow
257 } else if (hi == 0) {
258 result = lo/c; // note: c can't be 0
259 if ((result >> 31) != 0) // result must fit in 31 bits
260 goto overflow;
261 } else {
262 uint32_t r = hi;
263 int bits = 31;
264 result = 0;
265 do {
266 r = (r << 1) | (lo >> 31);
267 lo <<= 1;
268 result <<= 1;
269 if (r >= c) {
270 r -= c;
271 result |= 1;
272 }
273 } while (bits--);
274 }
275 return int32_t(result);
276}
277
278// assumes a >= 0 and c >= b >= 0
279static inline
280int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
281{
282 int32_t r = 0, q = 0, i;
283 int leading = gglClz(a);
284 i = 32 - leading;
285 a <<= leading;
286 do {
287 r <<= 1;
288 if (a < 0)
289 r += b;
290 a <<= 1;
291 q <<= 1;
292 if (r >= c) {
293 r -= c;
294 q++;
295 }
296 asm(""::); // gcc generates better code this way
297 if (r >= c) {
298 r -= c;
299 q++;
300 }
301 }
302 while (--i);
303 return q;
304}
305
306// this function computes a*b/c with 64-bit intermediate accuracy
307// overflows (e.g. division by 0) are handled and return INT_MAX
308
309int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
310{
311 int32_t result;
312 int32_t sign = a^b^c;
313
314 if (a < 0) a = -a;
315 if (b < 0) b = -b;
316 if (c < 0) c = -c;
317
318 if (a < b) {
319 swap(a, b);
320 }
321
322 if (b <= c) result = quick_muldiv(a, b, c);
323 else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
324
325 if (sign < 0)
326 result = -result;
327
328 return result;
329}