Merge tag 'android-11.0.0_r16' into android-10.0
Android 11.0.0 release 16 - twrp bringup patch
diff --git a/libpixelflinger/t32cb16blend.S b/libpixelflinger/t32cb16blend.S
new file mode 100644
index 0000000..5e4995a
--- /dev/null
+++ b/libpixelflinger/t32cb16blend.S
@@ -0,0 +1,203 @@
+/* libs/pixelflinger/t32cb16blend.S
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+
+ .text
+ .syntax unified
+ .balign 4
+
+ .global scanline_t32cb16blend_arm
+
+
+/*
+ * .macro pixel
+ *
+ * \DREG is a 32-bit register containing *two* original destination RGB565
+ * pixels, with the even one in the low-16 bits, and the odd one in the
+ * high 16 bits.
+ *
+ * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
+ *
+ * \FB is a target register that will contain the blended pixel values.
+ *
+ * \ODD is either 0 or 1 and indicates if we're blending the lower or
+ * upper 16-bit pixels in DREG into FB
+ *
+ *
+ * clobbered: r6, r7, lr
+ *
+ */
+
+.macro pixel, DREG, SRC, FB, ODD
+
+ // SRC = 0xAABBGGRR
+ mov r7, \SRC, lsr #24 // sA
+ add r7, r7, r7, lsr #7 // sA + (sA >> 7)
+ rsb r7, r7, #0x100 // sA = 0x100 - (sA+(sA>>7))
+
+1:
+
+.if \ODD
+
+ // red
+ mov lr, \DREG, lsr #(16 + 11)
+ smulbb lr, r7, lr
+ mov r6, \SRC, lsr #3
+ and r6, r6, #0x1F
+ add lr, r6, lr, lsr #8
+ cmp lr, #0x1F
+ orrhs \FB, \FB, #(0x1F<<(16 + 11))
+ orrlo \FB, \FB, lr, lsl #(16 + 11)
+
+ // green
+ and r6, \DREG, #(0x3F<<(16 + 5))
+ smulbt r6, r7, r6
+ mov lr, \SRC, lsr #(8+2)
+ and lr, lr, #0x3F
+ add r6, lr, r6, lsr #(5+8)
+ cmp r6, #0x3F
+ orrhs \FB, \FB, #(0x3F<<(16 + 5))
+ orrlo \FB, \FB, r6, lsl #(16 + 5)
+
+ // blue
+ and lr, \DREG, #(0x1F << 16)
+ smulbt lr, r7, lr
+ mov r6, \SRC, lsr #(8+8+3)
+ and r6, r6, #0x1F
+ add lr, r6, lr, lsr #8
+ cmp lr, #0x1F
+ orrhs \FB, \FB, #(0x1F << 16)
+ orrlo \FB, \FB, lr, lsl #16
+
+.else
+
+ // red
+ mov lr, \DREG, lsr #11
+ and lr, lr, #0x1F
+ smulbb lr, r7, lr
+ mov r6, \SRC, lsr #3
+ and r6, r6, #0x1F
+ add lr, r6, lr, lsr #8
+ cmp lr, #0x1F
+ movhs \FB, #(0x1F<<11)
+ movlo \FB, lr, lsl #11
+
+
+ // green
+ and r6, \DREG, #(0x3F<<5)
+ smulbb r6, r7, r6
+ mov lr, \SRC, lsr #(8+2)
+ and lr, lr, #0x3F
+ add r6, lr, r6, lsr #(5+8)
+ cmp r6, #0x3F
+ orrhs \FB, \FB, #(0x3F<<5)
+ orrlo \FB, \FB, r6, lsl #5
+
+ // blue
+ and lr, \DREG, #0x1F
+ smulbb lr, r7, lr
+ mov r6, \SRC, lsr #(8+8+3)
+ and r6, r6, #0x1F
+ add lr, r6, lr, lsr #8
+ cmp lr, #0x1F
+ orrhs \FB, \FB, #0x1F
+ orrlo \FB, \FB, lr
+
+.endif
+
+ .endm
+
+
+// r0: dst ptr
+// r1: src ptr
+// r2: count
+// r3: d
+// r4: s0
+// r5: s1
+// r6: pixel
+// r7: pixel
+// r8: free
+// r9: free
+// r10: free
+// r11: free
+// r12: scratch
+// r14: pixel
+
+scanline_t32cb16blend_arm:
+ stmfd sp!, {r4-r7, lr}
+
+ pld [r0]
+ pld [r1]
+
+ // align DST to 32 bits
+ tst r0, #0x3
+ beq aligned
+ subs r2, r2, #1
+ ldmfdlo sp!, {r4-r7, lr} // return
+ bxlo lr
+
+last:
+ ldr r4, [r1], #4
+ ldrh r3, [r0]
+ pixel r3, r4, r12, 0
+ strh r12, [r0], #2
+
+aligned:
+ subs r2, r2, #2
+ blo 9f
+
+ // The main loop is unrolled twice and processes 4 pixels
+8: ldmia r1!, {r4, r5}
+ // stream the source
+ pld [r1, #32]
+ add r0, r0, #4
+ // it's all zero, skip this pixel
+ orrs r3, r4, r5
+ beq 7f
+
+ // load the destination
+ ldr r3, [r0, #-4]
+ // stream the destination
+ pld [r0, #32]
+ pixel r3, r4, r12, 0
+ pixel r3, r5, r12, 1
+ // effectively, we're getting write-combining by virtue of the
+ // cpu's write-back cache.
+ str r12, [r0, #-4]
+
+ // 2nd iterration of the loop, don't stream anything
+ subs r2, r2, #2
+ movlt r4, r5
+ blt 9f
+ ldmia r1!, {r4, r5}
+ add r0, r0, #4
+ orrs r3, r4, r5
+ beq 7f
+ ldr r3, [r0, #-4]
+ pixel r3, r4, r12, 0
+ pixel r3, r5, r12, 16
+ str r12, [r0, #-4]
+
+
+7: subs r2, r2, #2
+ bhs 8b
+ mov r4, r5
+
+9: adds r2, r2, #1
+ ldmfdlo sp!, {r4-r7, lr} // return
+ bxlo lr
+ b last