Blame - libpixelflinger/t32cb16blend.S - android_bootable_recovery

blob: 5e4995a20239b23a52ed6836da6a7d686eecc06b [file] [log] [blame]

bigbiff	673c7ae	2020-12-02 19:44:56 -0500	[diff] [blame]	1	/* libs/pixelflinger/t32cb16blend.S
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18
				19	.text
				20	.syntax unified
				21	.balign 4
				22
				23	.global scanline_t32cb16blend_arm
				24
				25
				26	/*
				27	* .macro pixel
				28	*
				29	* \DREG is a 32-bit register containing two original destination RGB565
				30	* pixels, with the even one in the low-16 bits, and the odd one in the
				31	* high 16 bits.
				32	*
				33	* \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
				34	*
				35	* \FB is a target register that will contain the blended pixel values.
				36	*
				37	* \ODD is either 0 or 1 and indicates if we're blending the lower or
				38	* upper 16-bit pixels in DREG into FB
				39	*
				40	*
				41	* clobbered: r6, r7, lr
				42	*
				43	*/
				44
				45	.macro pixel, DREG, SRC, FB, ODD
				46
				47	// SRC = 0xAABBGGRR
				48	mov r7, \SRC, lsr #24 // sA
				49	add r7, r7, r7, lsr #7 // sA + (sA >> 7)
				50	rsb r7, r7, #0x100 // sA = 0x100 - (sA+(sA>>7))
				51
				52	1:
				53
				54	.if \ODD
				55
				56	// red
				57	mov lr, \DREG, lsr #(16 + 11)
				58	smulbb lr, r7, lr
				59	mov r6, \SRC, lsr #3
				60	and r6, r6, #0x1F
				61	add lr, r6, lr, lsr #8
				62	cmp lr, #0x1F
				63	orrhs \FB, \FB, #(0x1F<<(16 + 11))
				64	orrlo \FB, \FB, lr, lsl #(16 + 11)
				65
				66	// green
				67	and r6, \DREG, #(0x3F<<(16 + 5))
				68	smulbt r6, r7, r6
				69	mov lr, \SRC, lsr #(8+2)
				70	and lr, lr, #0x3F
				71	add r6, lr, r6, lsr #(5+8)
				72	cmp r6, #0x3F
				73	orrhs \FB, \FB, #(0x3F<<(16 + 5))
				74	orrlo \FB, \FB, r6, lsl #(16 + 5)
				75
				76	// blue
				77	and lr, \DREG, #(0x1F << 16)
				78	smulbt lr, r7, lr
				79	mov r6, \SRC, lsr #(8+8+3)
				80	and r6, r6, #0x1F
				81	add lr, r6, lr, lsr #8
				82	cmp lr, #0x1F
				83	orrhs \FB, \FB, #(0x1F << 16)
				84	orrlo \FB, \FB, lr, lsl #16
				85
				86	.else
				87
				88	// red
				89	mov lr, \DREG, lsr #11
				90	and lr, lr, #0x1F
				91	smulbb lr, r7, lr
				92	mov r6, \SRC, lsr #3
				93	and r6, r6, #0x1F
				94	add lr, r6, lr, lsr #8
				95	cmp lr, #0x1F
				96	movhs \FB, #(0x1F<<11)
				97	movlo \FB, lr, lsl #11
				98
				99
				100	// green
				101	and r6, \DREG, #(0x3F<<5)
				102	smulbb r6, r7, r6
				103	mov lr, \SRC, lsr #(8+2)
				104	and lr, lr, #0x3F
				105	add r6, lr, r6, lsr #(5+8)
				106	cmp r6, #0x3F
				107	orrhs \FB, \FB, #(0x3F<<5)
				108	orrlo \FB, \FB, r6, lsl #5
				109
				110	// blue
				111	and lr, \DREG, #0x1F
				112	smulbb lr, r7, lr
				113	mov r6, \SRC, lsr #(8+8+3)
				114	and r6, r6, #0x1F
				115	add lr, r6, lr, lsr #8
				116	cmp lr, #0x1F
				117	orrhs \FB, \FB, #0x1F
				118	orrlo \FB, \FB, lr
				119
				120	.endif
				121
				122	.endm
				123
				124
				125	// r0: dst ptr
				126	// r1: src ptr
				127	// r2: count
				128	// r3: d
				129	// r4: s0
				130	// r5: s1
				131	// r6: pixel
				132	// r7: pixel
				133	// r8: free
				134	// r9: free
				135	// r10: free
				136	// r11: free
				137	// r12: scratch
				138	// r14: pixel
				139
				140	scanline_t32cb16blend_arm:
				141	stmfd sp!, {r4-r7, lr}
				142
				143	pld [r0]
				144	pld [r1]
				145
				146	// align DST to 32 bits
				147	tst r0, #0x3
				148	beq aligned
				149	subs r2, r2, #1
				150	ldmfdlo sp!, {r4-r7, lr} // return
				151	bxlo lr
				152
				153	last:
				154	ldr r4, [r1], #4
				155	ldrh r3, [r0]
				156	pixel r3, r4, r12, 0
				157	strh r12, [r0], #2
				158
				159	aligned:
				160	subs r2, r2, #2
				161	blo 9f
				162
				163	// The main loop is unrolled twice and processes 4 pixels
				164	8: ldmia r1!, {r4, r5}
				165	// stream the source
				166	pld [r1, #32]
				167	add r0, r0, #4
				168	// it's all zero, skip this pixel
				169	orrs r3, r4, r5
				170	beq 7f
				171
				172	// load the destination
				173	ldr r3, [r0, #-4]
				174	// stream the destination
				175	pld [r0, #32]
				176	pixel r3, r4, r12, 0
				177	pixel r3, r5, r12, 1
				178	// effectively, we're getting write-combining by virtue of the
				179	// cpu's write-back cache.
				180	str r12, [r0, #-4]
				181
				182	// 2nd iterration of the loop, don't stream anything
				183	subs r2, r2, #2
				184	movlt r4, r5
				185	blt 9f
				186	ldmia r1!, {r4, r5}
				187	add r0, r0, #4
				188	orrs r3, r4, r5
				189	beq 7f
				190	ldr r3, [r0, #-4]
				191	pixel r3, r4, r12, 0
				192	pixel r3, r5, r12, 16
				193	str r12, [r0, #-4]
				194
				195
				196	7: subs r2, r2, #2
				197	bhs 8b
				198	mov r4, r5
				199
				200	9: adds r2, r2, #1
				201	ldmfdlo sp!, {r4-r7, lr} // return
				202	bxlo lr
				203	b last