bigbiff | 673c7ae | 2020-12-02 19:44:56 -0500 | [diff] [blame] | 1 | /* libs/pixelflinger/fixed.cpp |
| 2 | ** |
| 3 | ** Copyright 2006, The Android Open Source Project |
| 4 | ** |
| 5 | ** Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | ** you may not use this file except in compliance with the License. |
| 7 | ** You may obtain a copy of the License at |
| 8 | ** |
| 9 | ** http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | ** |
| 11 | ** Unless required by applicable law or agreed to in writing, software |
| 12 | ** distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | ** See the License for the specific language governing permissions and |
| 15 | ** limitations under the License. |
| 16 | */ |
| 17 | |
| 18 | #include <stdio.h> |
| 19 | |
| 20 | #include <private/pixelflinger/ggl_context.h> |
| 21 | #include <private/pixelflinger/ggl_fixed.h> |
| 22 | |
| 23 | |
| 24 | // ------------------------------------------------------------------------ |
| 25 | |
| 26 | int32_t gglRecipQNormalized(int32_t x, int* exponent) |
| 27 | { |
| 28 | const int32_t s = x>>31; |
| 29 | uint32_t a = s ? -x : x; |
| 30 | |
| 31 | // the result will overflow, so just set it to the biggest/inf value |
| 32 | if (ggl_unlikely(a <= 2LU)) { |
| 33 | *exponent = 0; |
| 34 | return s ? FIXED_MIN : FIXED_MAX; |
| 35 | } |
| 36 | |
| 37 | // Newton-Raphson iteration: |
| 38 | // x = r*(2 - a*r) |
| 39 | |
| 40 | const int32_t lz = gglClz(a); |
| 41 | a <<= lz; // 0.32 |
| 42 | uint32_t r = a; |
| 43 | // note: if a == 0x80000000, this means x was a power-of-2, in this |
| 44 | // case we don't need to compute anything. We get the reciprocal for |
| 45 | // (almost) free. |
| 46 | if (a != 0x80000000) { |
| 47 | r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a |
| 48 | // 0.32 + 2.30 = 2.62 -> 2.30 |
| 49 | // 2.30 + 2.30 = 4.60 -> 2.30 |
| 50 | r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; |
| 51 | r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; |
| 52 | } |
| 53 | |
| 54 | // shift right 1-bit to make room for the sign bit |
| 55 | *exponent = 30-lz-1; |
| 56 | r >>= 1; |
| 57 | return s ? -r : r; |
| 58 | } |
| 59 | |
| 60 | int32_t gglRecipQ(GGLfixed x, int q) |
| 61 | { |
| 62 | int shift; |
| 63 | x = gglRecipQNormalized(x, &shift); |
| 64 | shift += 16-q; |
| 65 | if (shift > 0) |
| 66 | x += 1L << (shift-1); // rounding |
| 67 | x >>= shift; |
| 68 | return x; |
| 69 | } |
| 70 | |
| 71 | // ------------------------------------------------------------------------ |
| 72 | |
| 73 | static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = { |
| 74 | // 1/sqrt(x) with x = 1-N/16, N=[8...1] |
| 75 | 0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865 |
| 76 | }; |
| 77 | |
| 78 | GGLfixed gglSqrtRecipx(GGLfixed x) |
| 79 | { |
| 80 | if (x == 0) return FIXED_MAX; |
| 81 | if (x == FIXED_ONE) return x; |
| 82 | const GGLfixed a = x; |
| 83 | const int32_t lz = gglClz(x); |
| 84 | x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7]; |
| 85 | const int32_t exp = lz - 16; |
| 86 | if (exp <= 0) x >>= -exp>>1; |
| 87 | else x <<= (exp>>1) + (exp & 1); |
| 88 | if (exp & 1) { |
| 89 | x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1; |
| 90 | } |
| 91 | // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x) |
| 92 | x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); |
| 93 | x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); |
| 94 | return x; |
| 95 | } |
| 96 | |
| 97 | GGLfixed gglSqrtx(GGLfixed a) |
| 98 | { |
| 99 | // Compute a full precision square-root (24 bits accuracy) |
| 100 | GGLfixed r = 0; |
| 101 | GGLfixed bit = 0x800000; |
| 102 | int32_t bshift = 15; |
| 103 | do { |
| 104 | GGLfixed temp = bit + (r<<1); |
| 105 | if (bshift >= 8) temp <<= (bshift-8); |
| 106 | else temp >>= (8-bshift); |
| 107 | if (a >= temp) { |
| 108 | r += bit; |
| 109 | a -= temp; |
| 110 | } |
| 111 | bshift--; |
| 112 | } while (bit>>=1); |
| 113 | return r; |
| 114 | } |
| 115 | |
| 116 | // ------------------------------------------------------------------------ |
| 117 | |
| 118 | static const GGLfixed ggl_log_approx_tab[] = { |
| 119 | // -ln(x)/ln(2) with x = N/16, N=[8...16] |
| 120 | 0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000 |
| 121 | }; |
| 122 | |
| 123 | static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0] |
| 124 | 0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000 |
| 125 | }; |
| 126 | |
| 127 | GGLfixed gglPowx(GGLfixed x, GGLfixed y) |
| 128 | { |
| 129 | // prerequisite: 0 <= x <= 1, and y >=0 |
| 130 | |
| 131 | // pow(x,y) = 2^(y*log2(x)) |
| 132 | // = 2^(y*log2(x*(2^exp)*(2^-exp)))) |
| 133 | // = 2^(y*(log2(X)-exp)) |
| 134 | // = 2^(log2(X)*y - y*exp) |
| 135 | // = 2^( - (-log2(X)*y + y*exp) ) |
| 136 | |
| 137 | int32_t exp = gglClz(x) - 16; |
| 138 | GGLfixed f = x << exp; |
| 139 | x = (f & 0x0FFF)<<4; |
| 140 | f = (f >> 12) & 0x7; |
| 141 | GGLfixed p = gglMulAddx( |
| 142 | ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x, |
| 143 | ggl_log_approx_tab[f]); |
| 144 | p = gglMulAddx(p, y, y*exp); |
| 145 | exp = gglFixedToIntFloor(p); |
| 146 | if (exp < 31) { |
| 147 | p = gglFracx(p); |
| 148 | x = (p & 0x1FFF)<<3; |
| 149 | p >>= 13; |
| 150 | p = gglMulAddx( |
| 151 | ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x, |
| 152 | ggl_alog_approx_tab[p]); |
| 153 | p >>= exp; |
| 154 | } else { |
| 155 | p = 0; |
| 156 | } |
| 157 | return p; |
| 158 | // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f; |
| 159 | } |
| 160 | |
| 161 | // ------------------------------------------------------------------------ |
| 162 | |
| 163 | int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) |
| 164 | { |
| 165 | //int32_t r =int32_t((int64_t(n)<<i)/d); |
| 166 | const int32_t ds = n^d; |
| 167 | if (n<0) n = -n; |
| 168 | if (d<0) d = -d; |
| 169 | int nd = gglClz(d) - gglClz(n); |
| 170 | i += nd + 1; |
| 171 | if (nd > 0) d <<= nd; |
| 172 | else n <<= -nd; |
| 173 | uint32_t q = 0; |
| 174 | |
| 175 | int j = i & 7; |
| 176 | i >>= 3; |
| 177 | |
| 178 | // gcc deals with the code below pretty well. |
| 179 | // we get 3.75 cycles per bit in the main loop |
| 180 | // and 8 cycles per bit in the termination loop |
| 181 | if (ggl_likely(i)) { |
| 182 | n -= d; |
| 183 | do { |
| 184 | q <<= 8; |
| 185 | if (n>=0) q |= 128; |
| 186 | else n += d; |
| 187 | n = n*2 - d; |
| 188 | if (n>=0) q |= 64; |
| 189 | else n += d; |
| 190 | n = n*2 - d; |
| 191 | if (n>=0) q |= 32; |
| 192 | else n += d; |
| 193 | n = n*2 - d; |
| 194 | if (n>=0) q |= 16; |
| 195 | else n += d; |
| 196 | n = n*2 - d; |
| 197 | if (n>=0) q |= 8; |
| 198 | else n += d; |
| 199 | n = n*2 - d; |
| 200 | if (n>=0) q |= 4; |
| 201 | else n += d; |
| 202 | n = n*2 - d; |
| 203 | if (n>=0) q |= 2; |
| 204 | else n += d; |
| 205 | n = n*2 - d; |
| 206 | if (n>=0) q |= 1; |
| 207 | else n += d; |
| 208 | |
| 209 | if (--i == 0) |
| 210 | goto finish; |
| 211 | |
| 212 | n = n*2 - d; |
| 213 | } while(true); |
| 214 | do { |
| 215 | q <<= 1; |
| 216 | n = n*2 - d; |
| 217 | if (n>=0) q |= 1; |
| 218 | else n += d; |
| 219 | finish: ; |
| 220 | } while (j--); |
| 221 | return (ds<0) ? -q : q; |
| 222 | } |
| 223 | |
| 224 | n -= d; |
| 225 | if (n>=0) q |= 1; |
| 226 | else n += d; |
| 227 | j--; |
| 228 | goto finish; |
| 229 | } |
| 230 | |
| 231 | // ------------------------------------------------------------------------ |
| 232 | |
| 233 | // assumes that the int32_t values of a, b, and c are all positive |
| 234 | // use when both a and b are larger than c |
| 235 | |
| 236 | template <typename T> |
| 237 | static inline void swap(T& a, T& b) { |
| 238 | T t(a); |
| 239 | a = b; |
| 240 | b = t; |
| 241 | } |
| 242 | |
| 243 | static __attribute__((noinline)) |
| 244 | int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c) |
| 245 | { |
| 246 | // first we compute a*b as a 64-bit integer |
| 247 | // (GCC generates umull with the code below) |
| 248 | uint64_t ab = uint64_t(a)*b; |
| 249 | uint32_t hi = ab>>32; |
| 250 | uint32_t lo = ab; |
| 251 | uint32_t result; |
| 252 | |
| 253 | // now perform the division |
| 254 | if (hi >= c) { |
| 255 | overflow: |
| 256 | result = 0x7fffffff; // basic overflow |
| 257 | } else if (hi == 0) { |
| 258 | result = lo/c; // note: c can't be 0 |
| 259 | if ((result >> 31) != 0) // result must fit in 31 bits |
| 260 | goto overflow; |
| 261 | } else { |
| 262 | uint32_t r = hi; |
| 263 | int bits = 31; |
| 264 | result = 0; |
| 265 | do { |
| 266 | r = (r << 1) | (lo >> 31); |
| 267 | lo <<= 1; |
| 268 | result <<= 1; |
| 269 | if (r >= c) { |
| 270 | r -= c; |
| 271 | result |= 1; |
| 272 | } |
| 273 | } while (bits--); |
| 274 | } |
| 275 | return int32_t(result); |
| 276 | } |
| 277 | |
| 278 | // assumes a >= 0 and c >= b >= 0 |
| 279 | static inline |
| 280 | int32_t quick_muldiv(int32_t a, int32_t b, int32_t c) |
| 281 | { |
| 282 | int32_t r = 0, q = 0, i; |
| 283 | int leading = gglClz(a); |
| 284 | i = 32 - leading; |
| 285 | a <<= leading; |
| 286 | do { |
| 287 | r <<= 1; |
| 288 | if (a < 0) |
| 289 | r += b; |
| 290 | a <<= 1; |
| 291 | q <<= 1; |
| 292 | if (r >= c) { |
| 293 | r -= c; |
| 294 | q++; |
| 295 | } |
| 296 | asm(""::); // gcc generates better code this way |
| 297 | if (r >= c) { |
| 298 | r -= c; |
| 299 | q++; |
| 300 | } |
| 301 | } |
| 302 | while (--i); |
| 303 | return q; |
| 304 | } |
| 305 | |
| 306 | // this function computes a*b/c with 64-bit intermediate accuracy |
| 307 | // overflows (e.g. division by 0) are handled and return INT_MAX |
| 308 | |
| 309 | int32_t gglMulDivi(int32_t a, int32_t b, int32_t c) |
| 310 | { |
| 311 | int32_t result; |
| 312 | int32_t sign = a^b^c; |
| 313 | |
| 314 | if (a < 0) a = -a; |
| 315 | if (b < 0) b = -b; |
| 316 | if (c < 0) c = -c; |
| 317 | |
| 318 | if (a < b) { |
| 319 | swap(a, b); |
| 320 | } |
| 321 | |
| 322 | if (b <= c) result = quick_muldiv(a, b, c); |
| 323 | else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c); |
| 324 | |
| 325 | if (sign < 0) |
| 326 | result = -result; |
| 327 | |
| 328 | return result; |
| 329 | } |