bigbiff | 673c7ae | 2020-12-02 19:44:56 -0500 | [diff] [blame] | 1 | /* libs/pixelflinger/codeflinger/load_store.cpp |
| 2 | ** |
| 3 | ** Copyright 2006, The Android Open Source Project |
| 4 | ** |
| 5 | ** Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | ** you may not use this file except in compliance with the License. |
| 7 | ** You may obtain a copy of the License at |
| 8 | ** |
| 9 | ** http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | ** |
| 11 | ** Unless required by applicable law or agreed to in writing, software |
| 12 | ** distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | ** See the License for the specific language governing permissions and |
| 15 | ** limitations under the License. |
| 16 | */ |
| 17 | |
| 18 | #define LOG_TAG "pixelflinger-code" |
| 19 | |
| 20 | #include <assert.h> |
| 21 | #include <stdio.h> |
| 22 | |
| 23 | #include <log/log.h> |
| 24 | |
| 25 | #include "GGLAssembler.h" |
| 26 | |
| 27 | namespace android { |
| 28 | |
| 29 | // ---------------------------------------------------------------------------- |
| 30 | |
| 31 | void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) |
| 32 | { |
| 33 | const int bits = addr.size; |
| 34 | const int inc = (flags & WRITE_BACK)?1:0; |
| 35 | switch (bits) { |
| 36 | case 32: |
| 37 | if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); |
| 38 | else STR(AL, s.reg, addr.reg); |
| 39 | break; |
| 40 | case 24: |
| 41 | // 24 bits formats are a little special and used only for RGB |
| 42 | // 0x00BBGGRR is unpacked as R,G,B |
| 43 | STRB(AL, s.reg, addr.reg, immed12_pre(0)); |
| 44 | MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); |
| 45 | STRB(AL, s.reg, addr.reg, immed12_pre(1)); |
| 46 | MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); |
| 47 | STRB(AL, s.reg, addr.reg, immed12_pre(2)); |
| 48 | if (!(s.flags & CORRUPTIBLE)) { |
| 49 | MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); |
| 50 | } |
| 51 | if (inc) |
| 52 | ADD(AL, 0, addr.reg, addr.reg, imm(3)); |
| 53 | break; |
| 54 | case 16: |
| 55 | if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); |
| 56 | else STRH(AL, s.reg, addr.reg); |
| 57 | break; |
| 58 | case 8: |
| 59 | if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); |
| 60 | else STRB(AL, s.reg, addr.reg); |
| 61 | break; |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) |
| 66 | { |
| 67 | Scratch scratches(registerFile()); |
| 68 | int s0; |
| 69 | |
| 70 | const int bits = addr.size; |
| 71 | const int inc = (flags & WRITE_BACK)?1:0; |
| 72 | switch (bits) { |
| 73 | case 32: |
| 74 | if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); |
| 75 | else LDR(AL, s.reg, addr.reg); |
| 76 | break; |
| 77 | case 24: |
| 78 | // 24 bits formats are a little special and used only for RGB |
| 79 | // R,G,B is packed as 0x00BBGGRR |
| 80 | s0 = scratches.obtain(); |
| 81 | if (s.reg != addr.reg) { |
| 82 | LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R |
| 83 | LDRB(AL, s0, addr.reg, immed12_pre(1)); // G |
| 84 | ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); |
| 85 | LDRB(AL, s0, addr.reg, immed12_pre(2)); // B |
| 86 | ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); |
| 87 | } else { |
| 88 | int s1 = scratches.obtain(); |
| 89 | LDRB(AL, s1, addr.reg, immed12_pre(0)); // R |
| 90 | LDRB(AL, s0, addr.reg, immed12_pre(1)); // G |
| 91 | ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); |
| 92 | LDRB(AL, s0, addr.reg, immed12_pre(2)); // B |
| 93 | ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); |
| 94 | } |
| 95 | if (inc) |
| 96 | ADD(AL, 0, addr.reg, addr.reg, imm(3)); |
| 97 | break; |
| 98 | case 16: |
| 99 | if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); |
| 100 | else LDRH(AL, s.reg, addr.reg); |
| 101 | break; |
| 102 | case 8: |
| 103 | if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); |
| 104 | else LDRB(AL, s.reg, addr.reg); |
| 105 | break; |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) |
| 110 | { |
| 111 | const int maskLen = h-l; |
| 112 | |
| 113 | #ifdef __mips__ |
| 114 | assert(maskLen<=11); |
| 115 | #else |
| 116 | assert(maskLen<=8); |
| 117 | #endif |
| 118 | assert(h); |
| 119 | |
| 120 | if (h != bits) { |
| 121 | const int mask = ((1<<maskLen)-1) << l; |
| 122 | if (isValidImmediate(mask)) { |
| 123 | AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; |
| 124 | } else if (isValidImmediate(~mask)) { |
| 125 | BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; |
| 126 | } else { |
| 127 | MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); |
| 128 | l += 32-h; |
| 129 | h = 32; |
| 130 | } |
| 131 | s = d.reg; |
| 132 | } |
| 133 | |
| 134 | if (l) { |
| 135 | MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; |
| 136 | s = d.reg; |
| 137 | } |
| 138 | |
| 139 | if (s != d.reg) { |
| 140 | MOV(AL, 0, d.reg, s); |
| 141 | } |
| 142 | |
| 143 | d.s = maskLen; |
| 144 | } |
| 145 | |
| 146 | void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) |
| 147 | { |
| 148 | extract(d, s.reg, |
| 149 | s.format.c[component].h, |
| 150 | s.format.c[component].l, |
| 151 | s.size()); |
| 152 | } |
| 153 | |
| 154 | void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) |
| 155 | { |
| 156 | integer_t r(d.reg, 32, d.flags); |
| 157 | extract(r, s.reg, |
| 158 | s.format.c[component].h, |
| 159 | s.format.c[component].l, |
| 160 | s.size()); |
| 161 | d = component_t(r); |
| 162 | } |
| 163 | |
| 164 | |
| 165 | void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) |
| 166 | { |
| 167 | if (s.l || (s.flags & CLEAR_HI)) { |
| 168 | extract(d, s.reg, s.h, s.l, 32); |
| 169 | expand(d, d, dbits); |
| 170 | } else { |
| 171 | expand(d, integer_t(s.reg, s.size(), s.flags), dbits); |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) |
| 176 | { |
| 177 | integer_t r(d.reg, 32, d.flags); |
| 178 | expand(r, s, dbits); |
| 179 | d = component_t(r); |
| 180 | } |
| 181 | |
| 182 | void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) |
| 183 | { |
| 184 | assert(src.size()); |
| 185 | |
| 186 | int sbits = src.size(); |
| 187 | int s = src.reg; |
| 188 | int d = dst.reg; |
| 189 | |
| 190 | // be sure to set 'dst' after we read 'src' as they may be identical |
| 191 | dst.s = dbits; |
| 192 | dst.flags = 0; |
| 193 | |
| 194 | if (dbits<=sbits) { |
| 195 | if (s != d) { |
| 196 | MOV(AL, 0, d, s); |
| 197 | } |
| 198 | return; |
| 199 | } |
| 200 | |
| 201 | if (sbits == 1) { |
| 202 | RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); |
| 203 | // d = (s<<dbits) - s; |
| 204 | return; |
| 205 | } |
| 206 | |
| 207 | if (dbits % sbits) { |
| 208 | MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); |
| 209 | // d = s << (dbits-sbits); |
| 210 | dbits -= sbits; |
| 211 | do { |
| 212 | ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); |
| 213 | // d |= d >> sbits; |
| 214 | dbits -= sbits; |
| 215 | sbits *= 2; |
| 216 | } while(dbits>0); |
| 217 | return; |
| 218 | } |
| 219 | |
| 220 | dbits -= sbits; |
| 221 | do { |
| 222 | ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); |
| 223 | // d |= d<<sbits; |
| 224 | s = d; |
| 225 | dbits -= sbits; |
| 226 | if (sbits*2 < dbits) { |
| 227 | sbits *= 2; |
| 228 | } |
| 229 | } while(dbits>0); |
| 230 | } |
| 231 | |
| 232 | void GGLAssembler::downshift( |
| 233 | pixel_t& d, int component, component_t s, const reg_t& dither) |
| 234 | { |
| 235 | Scratch scratches(registerFile()); |
| 236 | |
| 237 | int sh = s.h; |
| 238 | int sl = s.l; |
| 239 | int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; |
| 240 | int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; |
| 241 | int sbits = sh - sl; |
| 242 | |
| 243 | int dh = d.format.c[component].h; |
| 244 | int dl = d.format.c[component].l; |
| 245 | int dbits = dh - dl; |
| 246 | int dithering = 0; |
| 247 | |
| 248 | ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); |
| 249 | |
| 250 | if (sbits>dbits) { |
| 251 | // see if we need to dither |
| 252 | dithering = mDithering; |
| 253 | } |
| 254 | |
| 255 | int ireg = d.reg; |
| 256 | if (!(d.flags & FIRST)) { |
| 257 | if (s.flags & CORRUPTIBLE) { |
| 258 | ireg = s.reg; |
| 259 | } else { |
| 260 | ireg = scratches.obtain(); |
| 261 | } |
| 262 | } |
| 263 | d.flags &= ~FIRST; |
| 264 | |
| 265 | if (maskHiBits) { |
| 266 | // we need to mask the high bits (and possibly the lowbits too) |
| 267 | // and we might be able to use immediate mask. |
| 268 | if (!dithering) { |
| 269 | // we don't do this if we only have maskLoBits because we can |
| 270 | // do it more efficiently below (in the case where dl=0) |
| 271 | const int offset = sh - dbits; |
| 272 | if (dbits<=8 && offset >= 0) { |
| 273 | const uint32_t mask = ((1<<dbits)-1) << offset; |
| 274 | if (isValidImmediate(mask) || isValidImmediate(~mask)) { |
| 275 | build_and_immediate(ireg, s.reg, mask, 32); |
| 276 | sl = offset; |
| 277 | s.reg = ireg; |
| 278 | sbits = dbits; |
| 279 | maskLoBits = maskHiBits = 0; |
| 280 | } |
| 281 | } |
| 282 | } else { |
| 283 | // in the dithering case though, we need to preserve the lower bits |
| 284 | const uint32_t mask = ((1<<sbits)-1) << sl; |
| 285 | if (isValidImmediate(mask) || isValidImmediate(~mask)) { |
| 286 | build_and_immediate(ireg, s.reg, mask, 32); |
| 287 | s.reg = ireg; |
| 288 | maskLoBits = maskHiBits = 0; |
| 289 | } |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | // XXX: we could special case (maskHiBits & !maskLoBits) |
| 294 | // like we do for maskLoBits below, but it happens very rarely |
| 295 | // that we have maskHiBits only and the conditions necessary to lead |
| 296 | // to better code (like doing d |= s << 24) |
| 297 | |
| 298 | if (maskHiBits) { |
| 299 | MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); |
| 300 | sl += 32-sh; |
| 301 | sh = 32; |
| 302 | s.reg = ireg; |
| 303 | maskHiBits = 0; |
| 304 | } |
| 305 | |
| 306 | // Downsampling should be performed as follows: |
| 307 | // V * ((1<<dbits)-1) / ((1<<sbits)-1) |
| 308 | // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] |
| 309 | // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] |
| 310 | // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits |
| 311 | // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) |
| 312 | // |
| 313 | // By approximating (1>>dbits) and (1>>sbits) to 0: |
| 314 | // |
| 315 | // V>>(sbits-dbits) - V>>sbits |
| 316 | // |
| 317 | // A good approximation is V>>(sbits-dbits), |
| 318 | // but better one (needed for dithering) is: |
| 319 | // |
| 320 | // (V>>(sbits-dbits)<<sbits - V)>>sbits |
| 321 | // (V<<dbits - V)>>sbits |
| 322 | // (V - V>>dbits)>>(sbits-dbits) |
| 323 | |
| 324 | // Dithering is done here |
| 325 | if (dithering) { |
| 326 | comment("dithering"); |
| 327 | if (sl) { |
| 328 | MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); |
| 329 | sh -= sl; |
| 330 | sl = 0; |
| 331 | s.reg = ireg; |
| 332 | } |
| 333 | // scaling (V-V>>dbits) |
| 334 | SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); |
| 335 | const int shift = (GGL_DITHER_BITS - (sbits-dbits)); |
| 336 | if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); |
| 337 | else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); |
| 338 | else ADD(AL, 0, ireg, ireg, dither.reg); |
| 339 | s.reg = ireg; |
| 340 | } |
| 341 | |
| 342 | if ((maskLoBits|dithering) && (sh > dbits)) { |
| 343 | int shift = sh-dbits; |
| 344 | if (dl) { |
| 345 | MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); |
| 346 | if (ireg == d.reg) { |
| 347 | MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); |
| 348 | } else { |
| 349 | ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); |
| 350 | } |
| 351 | } else { |
| 352 | if (ireg == d.reg) { |
| 353 | MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); |
| 354 | } else { |
| 355 | ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); |
| 356 | } |
| 357 | } |
| 358 | } else { |
| 359 | int shift = sh-dh; |
| 360 | if (shift>0) { |
| 361 | if (ireg == d.reg) { |
| 362 | MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); |
| 363 | } else { |
| 364 | ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); |
| 365 | } |
| 366 | } else if (shift<0) { |
| 367 | if (ireg == d.reg) { |
| 368 | MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); |
| 369 | } else { |
| 370 | ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); |
| 371 | } |
| 372 | } else { |
| 373 | if (ireg == d.reg) { |
| 374 | if (s.reg != d.reg) { |
| 375 | MOV(AL, 0, d.reg, s.reg); |
| 376 | } |
| 377 | } else { |
| 378 | ORR(AL, 0, d.reg, d.reg, s.reg); |
| 379 | } |
| 380 | } |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | }; // namespace android |