Blame - libpixelflinger/include/private/pixelflinger/ggl_fixed.h - android_bootable_recovery

blob: 4217a8997a0fea798b844fcd2c45ed30869a2d14 [file] [log] [blame]

bigbiff	673c7ae	2020-12-02 19:44:56 -0500	[diff] [blame]	1	/*
				2	* Copyright (C) 2005 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#ifndef ANDROID_GGL_FIXED_H
				18	#define ANDROID_GGL_FIXED_H
				19
				20	#include <math.h>
				21	#include <pixelflinger/pixelflinger.h>
				22
				23	// ----------------------------------------------------------------------------
				24
				25	#define CONST __attribute__((const))
				26	#define ALWAYS_INLINE __attribute__((always_inline))
				27
				28	const GGLfixed FIXED_BITS = 16;
				29	const GGLfixed FIXED_EPSILON = 1;
				30	const GGLfixed FIXED_ONE = 1L<<FIXED_BITS;
				31	const GGLfixed FIXED_HALF = 1L<<(FIXED_BITS-1);
				32	const GGLfixed FIXED_MIN = 0x80000000L;
				33	const GGLfixed FIXED_MAX = 0x7FFFFFFFL;
				34
				35	inline GGLfixed gglIntToFixed(GGLfixed i) ALWAYS_INLINE ;
				36	inline GGLfixed gglFixedToIntRound(GGLfixed f) ALWAYS_INLINE ;
				37	inline GGLfixed gglFixedToIntFloor(GGLfixed f) ALWAYS_INLINE ;
				38	inline GGLfixed gglFixedToIntCeil(GGLfixed f) ALWAYS_INLINE ;
				39	inline GGLfixed gglFracx(GGLfixed v) ALWAYS_INLINE ;
				40	inline GGLfixed gglFloorx(GGLfixed v) ALWAYS_INLINE ;
				41	inline GGLfixed gglCeilx(GGLfixed v) ALWAYS_INLINE ;
				42	inline GGLfixed gglCenterx(GGLfixed v) ALWAYS_INLINE ;
				43	inline GGLfixed gglRoundx(GGLfixed v) ALWAYS_INLINE ;
				44
				45	GGLfixed gglIntToFixed(GGLfixed i) {
				46	return i<<FIXED_BITS;
				47	}
				48	GGLfixed gglFixedToIntRound(GGLfixed f) {
				49	return (f + FIXED_HALF)>>FIXED_BITS;
				50	}
				51	GGLfixed gglFixedToIntFloor(GGLfixed f) {
				52	return f>>FIXED_BITS;
				53	}
				54	GGLfixed gglFixedToIntCeil(GGLfixed f) {
				55	return (f + ((1<<FIXED_BITS) - 1))>>FIXED_BITS;
				56	}
				57
				58	GGLfixed gglFracx(GGLfixed v) {
				59	return v & ((1<<FIXED_BITS)-1);
				60	}
				61	GGLfixed gglFloorx(GGLfixed v) {
				62	return gglFixedToIntFloor(v)<<FIXED_BITS;
				63	}
				64	GGLfixed gglCeilx(GGLfixed v) {
				65	return gglFixedToIntCeil(v)<<FIXED_BITS;
				66	}
				67	GGLfixed gglCenterx(GGLfixed v) {
				68	return gglFloorx(v + FIXED_HALF) \| FIXED_HALF;
				69	}
				70	GGLfixed gglRoundx(GGLfixed v) {
				71	return gglFixedToIntRound(v)<<FIXED_BITS;
				72	}
				73
				74	// conversion from (unsigned) int, short, byte to fixed...
				75	#define GGL_B_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<10 )
				76	#define GGL_S_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<2 )
				77	#define GGL_I_TO_X(_x) GGLfixed( ((int32_t(_x)>>1)+1)>>14 )
				78	#define GGL_UB_TO_X(_x) GGLfixed( uint32_t(_x) + \
				79	(uint32_t(_x)<<8) + \
				80	(uint32_t(_x)>>7) )
				81	#define GGL_US_TO_X(_x) GGLfixed( (_x) + ((_x)>>15) )
				82	#define GGL_UI_TO_X(_x) GGLfixed( (((_x)>>1)+1)>>15 )
				83
				84	// ----------------------------------------------------------------------------
				85
				86	GGLfixed gglPowx(GGLfixed x, GGLfixed y) CONST;
				87	GGLfixed gglSqrtx(GGLfixed a) CONST;
				88	GGLfixed gglSqrtRecipx(GGLfixed x) CONST;
				89	int32_t gglMulDivi(int32_t a, int32_t b, int32_t c);
				90
				91	int32_t gglRecipQNormalized(int32_t x, int* exponent);
				92	int32_t gglRecipQ(GGLfixed x, int q) CONST;
				93
				94	inline GGLfixed gglRecip(GGLfixed x) CONST;
				95	inline GGLfixed gglRecip(GGLfixed x) {
				96	return gglRecipQ(x, 16);
				97	}
				98
				99	inline GGLfixed gglRecip28(GGLfixed x) CONST;
				100	int32_t gglRecip28(GGLfixed x) {
				101	return gglRecipQ(x, 28);
				102	}
				103
				104	// ----------------------------------------------------------------------------
				105
				106	#if defined(__arm__) && !defined(__thumb__)
				107
				108	// inline ARM implementations
				109	inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
				110	__attribute__((always_inline)) inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) {
				111	GGLfixed result, t;
				112	if (__builtin_constant_p(shift)) {
				113	asm("smull %[lo], %[hi], %[x], %[y] \n"
				114	"movs %[lo], %[lo], lsr %[rshift] \n"
				115	"adc %[lo], %[lo], %[hi], lsl %[lshift] \n"
				116	: [lo]"=r"(result), [hi]"=r"(t), [x]"=r"(x)
				117	: "%[x]"(x), [y]"r"(y), [lshift] "I"(32-shift), [rshift] "I"(shift)
				118	: "cc"
				119	);
				120	} else {
				121	asm("smull %[lo], %[hi], %[x], %[y] \n"
				122	"movs %[lo], %[lo], lsr %[rshift] \n"
				123	"adc %[lo], %[lo], %[hi], lsl %[lshift] \n"
				124	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				125	: "%[x]"(x), [y]"r"(y), [lshift] "r"(32-shift), [rshift] "r"(shift)
				126	: "cc"
				127	);
				128	}
				129	return result;
				130	}
				131
				132	inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
				133	__attribute__((always_inline)) inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a,
				134	int shift) {
				135	GGLfixed result, t;
				136	if (__builtin_constant_p(shift)) {
				137	asm("smull %[lo], %[hi], %[x], %[y] \n"
				138	"add %[lo], %[a], %[lo], lsr %[rshift] \n"
				139	"add %[lo], %[lo], %[hi], lsl %[lshift] \n"
				140	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				141	: "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
				142	);
				143	} else {
				144	asm("smull %[lo], %[hi], %[x], %[y] \n"
				145	"add %[lo], %[a], %[lo], lsr %[rshift] \n"
				146	"add %[lo], %[lo], %[hi], lsl %[lshift] \n"
				147	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				148	: "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
				149	);
				150	}
				151	return result;
				152	}
				153
				154	inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
				155	inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
				156	GGLfixed result, t;
				157	if (__builtin_constant_p(shift)) {
				158	asm("smull %[lo], %[hi], %[x], %[y] \n"
				159	"rsb %[lo], %[a], %[lo], lsr %[rshift] \n"
				160	"add %[lo], %[lo], %[hi], lsl %[lshift] \n"
				161	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				162	: "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
				163	);
				164	} else {
				165	asm("smull %[lo], %[hi], %[x], %[y] \n"
				166	"rsb %[lo], %[a], %[lo], lsr %[rshift] \n"
				167	"add %[lo], %[lo], %[hi], lsl %[lshift] \n"
				168	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				169	: "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
				170	);
				171	}
				172	return result;
				173	}
				174
				175	inline int64_t gglMulii(int32_t x, int32_t y) CONST;
				176	inline int64_t gglMulii(int32_t x, int32_t y)
				177	{
				178	// 64-bits result: r0=low, r1=high
				179	union {
				180	struct {
				181	int32_t lo;
				182	int32_t hi;
				183	} s;
				184	int64_t res;
				185	};
				186	asm("smull %0, %1, %2, %3 \n"
				187	: "=r"(s.lo), "=&r"(s.hi)
				188	: "%r"(x), "r"(y)
				189	:
				190	);
				191	return res;
				192	}
				193	#elif defined(__mips__) && __mips_isa_rev < 6
				194
				195	/inline MIPS implementations/
				196	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
				197	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
				198	GGLfixed result,tmp,tmp1,tmp2;
				199
				200	if (__builtin_constant_p(shift)) {
				201	if (shift == 0) {
				202	asm ("mult %[a], %[b] \t\n"
				203	"mflo %[res] \t\n"
				204	: [res]"=&r"(result),[tmp]"=&r"(tmp)
				205	: [a]"r"(a),[b]"r"(b)
				206	: "%hi","%lo"
				207	);
				208	} else if (shift == 32)
				209	{
				210	asm ("mult %[a], %[b] \t\n"
				211	"li %[tmp],1\t\n"
				212	"sll %[tmp],%[tmp],0x1f\t\n"
				213	"mflo %[res] \t\n"
				214	"addu %[tmp1],%[tmp],%[res] \t\n"
				215	"sltu %[tmp1],%[tmp1],%[tmp]\t\n" /obit/
				216	"sra %[tmp],%[tmp],0x1f \t\n"
				217	"mfhi %[res] \t\n"
				218	"addu %[res],%[res],%[tmp]\t\n"
				219	"addu %[res],%[res],%[tmp1]\t\n"
				220	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
				221	: [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
				222	: "%hi","%lo"
				223	);
				224	} else if ((shift >0) && (shift < 32))
				225	{
				226	asm ("mult %[a], %[b] \t\n"
				227	"li %[tmp],1 \t\n"
				228	"sll %[tmp],%[tmp],%[shiftm1] \t\n"
				229	"mflo %[res] \t\n"
				230	"addu %[tmp1],%[tmp],%[res] \t\n"
				231	"sltu %[tmp1],%[tmp1],%[tmp] \t\n" /obit?/
				232	"addu %[res],%[res],%[tmp] \t\n"
				233	"mfhi %[tmp] \t\n"
				234	"addu %[tmp],%[tmp],%[tmp1] \t\n"
				235	"sll %[tmp],%[tmp],%[lshift] \t\n"
				236	"srl %[res],%[res],%[rshift] \t\n"
				237	"or %[res],%[res],%[tmp] \t\n"
				238	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				239	: [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
				240	: "%hi","%lo"
				241	);
				242	} else {
				243	asm ("mult %[a], %[b] \t\n"
				244	"li %[tmp],1 \t\n"
				245	"sll %[tmp],%[tmp],%[shiftm1] \t\n"
				246	"mflo %[res] \t\n"
				247	"addu %[tmp1],%[tmp],%[res] \t\n"
				248	"sltu %[tmp1],%[tmp1],%[tmp] \t\n" /obit?/
				249	"sra %[tmp2],%[tmp],0x1f \t\n"
				250	"addu %[res],%[res],%[tmp] \t\n"
				251	"mfhi %[tmp] \t\n"
				252	"addu %[tmp],%[tmp],%[tmp2] \t\n"
				253	"addu %[tmp],%[tmp],%[tmp1] \t\n" /tmp=hi/
				254	"srl %[tmp2],%[res],%[rshift] \t\n"
				255	"srav %[res], %[tmp],%[rshift]\t\n"
				256	"sll %[tmp],%[tmp],1 \t\n"
				257	"sll %[tmp],%[tmp],%[norbits] \t\n"
				258	"or %[tmp],%[tmp],%[tmp2] \t\n"
				259	"movz %[res],%[tmp],%[bit5] \t\n"
				260	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				261	: [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
				262	: "%hi","%lo"
				263	);
				264	}
				265	} else {
				266	asm ("mult %[a], %[b] \t\n"
				267	"li %[tmp],1 \t\n"
				268	"sll %[tmp],%[tmp],%[shiftm1] \t\n"
				269	"mflo %[res] \t\n"
				270	"addu %[tmp1],%[tmp],%[res] \t\n"
				271	"sltu %[tmp1],%[tmp1],%[tmp] \t\n" /obit?/
				272	"sra %[tmp2],%[tmp],0x1f \t\n"
				273	"addu %[res],%[res],%[tmp] \t\n"
				274	"mfhi %[tmp] \t\n"
				275	"addu %[tmp],%[tmp],%[tmp2] \t\n"
				276	"addu %[tmp],%[tmp],%[tmp1] \t\n" /tmp=hi/
				277	"srl %[tmp2],%[res],%[rshift] \t\n"
				278	"srav %[res], %[tmp],%[rshift]\t\n"
				279	"sll %[tmp],%[tmp],1 \t\n"
				280	"sll %[tmp],%[tmp],%[norbits] \t\n"
				281	"or %[tmp],%[tmp],%[tmp2] \t\n"
				282	"movz %[res],%[tmp],%[bit5] \t\n"
				283	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				284	: [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
				285	: "%hi","%lo"
				286	);
				287	}
				288
				289	return result;
				290	}
				291
				292	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				293	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				294	GGLfixed result,t,tmp1,tmp2;
				295
				296	if (__builtin_constant_p(shift)) {
				297	if (shift == 0) {
				298	asm ("mult %[a], %[b] \t\n"
				299	"mflo %[lo] \t\n"
				300	"addu %[lo],%[lo],%[c] \t\n"
				301	: [lo]"=&r"(result)
				302	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				303	: "%hi","%lo"
				304	);
				305	} else if (shift == 32) {
				306	asm ("mult %[a], %[b] \t\n"
				307	"mfhi %[lo] \t\n"
				308	"addu %[lo],%[lo],%[c] \t\n"
				309	: [lo]"=&r"(result)
				310	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				311	: "%hi","%lo"
				312	);
				313	} else if ((shift>0) && (shift<32)) {
				314	asm ("mult %[a], %[b] \t\n"
				315	"mflo %[res] \t\n"
				316	"mfhi %[t] \t\n"
				317	"srl %[res],%[res],%[rshift] \t\n"
				318	"sll %[t],%[t],%[lshift] \t\n"
				319	"or %[res],%[res],%[t] \t\n"
				320	"addu %[res],%[res],%[c] \t\n"
				321	: [res]"=&r"(result),[t]"=&r"(t)
				322	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
				323	: "%hi","%lo"
				324	);
				325	} else {
				326	asm ("mult %[a], %[b] \t\n"
				327	"nor %[tmp1],$zero,%[shift]\t\n"
				328	"mflo %[res] \t\n"
				329	"mfhi %[t] \t\n"
				330	"srl %[res],%[res],%[shift] \t\n"
				331	"sll %[tmp2],%[t],1 \t\n"
				332	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				333	"or %[tmp1],%[tmp2],%[res] \t\n"
				334	"srav %[res],%[t],%[shift] \t\n"
				335	"andi %[tmp2],%[shift],0x20\t\n"
				336	"movz %[res],%[tmp1],%[tmp2]\t\n"
				337	"addu %[res],%[res],%[c] \t\n"
				338	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				339	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
				340	: "%hi","%lo"
				341	);
				342	}
				343	} else {
				344	asm ("mult %[a], %[b] \t\n"
				345	"nor %[tmp1],$zero,%[shift]\t\n"
				346	"mflo %[res] \t\n"
				347	"mfhi %[t] \t\n"
				348	"srl %[res],%[res],%[shift] \t\n"
				349	"sll %[tmp2],%[t],1 \t\n"
				350	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				351	"or %[tmp1],%[tmp2],%[res] \t\n"
				352	"srav %[res],%[t],%[shift] \t\n"
				353	"andi %[tmp2],%[shift],0x20\t\n"
				354	"movz %[res],%[tmp1],%[tmp2]\t\n"
				355	"addu %[res],%[res],%[c] \t\n"
				356	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				357	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
				358	: "%hi","%lo"
				359	);
				360	}
				361	return result;
				362	}
				363
				364	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				365	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				366	GGLfixed result,t,tmp1,tmp2;
				367
				368	if (__builtin_constant_p(shift)) {
				369	if (shift == 0) {
				370	asm ("mult %[a], %[b] \t\n"
				371	"mflo %[lo] \t\n"
				372	"subu %[lo],%[lo],%[c] \t\n"
				373	: [lo]"=&r"(result)
				374	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				375	: "%hi","%lo"
				376	);
				377	} else if (shift == 32) {
				378	asm ("mult %[a], %[b] \t\n"
				379	"mfhi %[lo] \t\n"
				380	"subu %[lo],%[lo],%[c] \t\n"
				381	: [lo]"=&r"(result)
				382	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				383	: "%hi","%lo"
				384	);
				385	} else if ((shift>0) && (shift<32)) {
				386	asm ("mult %[a], %[b] \t\n"
				387	"mflo %[res] \t\n"
				388	"mfhi %[t] \t\n"
				389	"srl %[res],%[res],%[rshift] \t\n"
				390	"sll %[t],%[t],%[lshift] \t\n"
				391	"or %[res],%[res],%[t] \t\n"
				392	"subu %[res],%[res],%[c] \t\n"
				393	: [res]"=&r"(result),[t]"=&r"(t)
				394	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
				395	: "%hi","%lo"
				396	);
				397	} else {
				398	asm ("mult %[a], %[b] \t\n"
				399	"nor %[tmp1],$zero,%[shift]\t\n"
				400	"mflo %[res] \t\n"
				401	"mfhi %[t] \t\n"
				402	"srl %[res],%[res],%[shift] \t\n"
				403	"sll %[tmp2],%[t],1 \t\n"
				404	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				405	"or %[tmp1],%[tmp2],%[res] \t\n"
				406	"srav %[res],%[t],%[shift] \t\n"
				407	"andi %[tmp2],%[shift],0x20\t\n"
				408	"movz %[res],%[tmp1],%[tmp2]\t\n"
				409	"subu %[res],%[res],%[c] \t\n"
				410	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				411	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
				412	: "%hi","%lo"
				413	);
				414	}
				415	} else {
				416	asm ("mult %[a], %[b] \t\n"
				417	"nor %[tmp1],$zero,%[shift]\t\n"
				418	"mflo %[res] \t\n"
				419	"mfhi %[t] \t\n"
				420	"srl %[res],%[res],%[shift] \t\n"
				421	"sll %[tmp2],%[t],1 \t\n"
				422	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				423	"or %[tmp1],%[tmp2],%[res] \t\n"
				424	"srav %[res],%[t],%[shift] \t\n"
				425	"andi %[tmp2],%[shift],0x20\t\n"
				426	"movz %[res],%[tmp1],%[tmp2]\t\n"
				427	"subu %[res],%[res],%[c] \t\n"
				428	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				429	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
				430	: "%hi","%lo"
				431	);
				432	}
				433	return result;
				434	}
				435
				436	inline int64_t gglMulii(int32_t x, int32_t y) CONST;
				437	inline int64_t gglMulii(int32_t x, int32_t y) {
				438	union {
				439	struct {
				440	#if defined(__MIPSEL__)
				441	int32_t lo;
				442	int32_t hi;
				443	#elif defined(__MIPSEB__)
				444	int32_t hi;
				445	int32_t lo;
				446	#endif
				447	} s;
				448	int64_t res;
				449	}u;
				450	asm("mult %2, %3 \t\n"
				451	"mfhi %1 \t\n"
				452	"mflo %0 \t\n"
				453	: "=r"(u.s.lo), "=&r"(u.s.hi)
				454	: "%r"(x), "r"(y)
				455	: "%hi","%lo"
				456	);
				457	return u.res;
				458	}
				459
				460	#elif defined(__aarch64__)
				461
				462	// inline AArch64 implementations
				463
				464	inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
				465	inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift)
				466	{
				467	GGLfixed result;
				468	GGLfixed round;
				469
				470	asm("mov %x[round], #1 \n"
				471	"lsl %x[round], %x[round], %x[shift] \n"
				472	"lsr %x[round], %x[round], #1 \n"
				473	"smaddl %x[result], %w[x], %w[y],%x[round] \n"
				474	"lsr %x[result], %x[result], %x[shift] \n"
				475	: [round]"=&r"(round), [result]"=&r"(result) \
				476	: [x]"r"(x), [y]"r"(y), [shift] "r"(shift) \
				477	:
				478	);
				479	return result;
				480	}
				481	inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
				482	inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
				483	{
				484	GGLfixed result;
				485	asm("smull %x[result], %w[x], %w[y] \n"
				486	"lsr %x[result], %x[result], %x[shift] \n"
				487	"add %w[result], %w[result], %w[a] \n"
				488	: [result]"=&r"(result) \
				489	: [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
				490	:
				491	);
				492	return result;
				493	}
				494
				495	inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
				496	inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
				497	{
				498
				499	GGLfixed result;
				500
				501	asm("smull %x[result], %w[x], %w[y] \n"
				502	"lsr %x[result], %x[result], %x[shift] \n"
				503	"sub %w[result], %w[result], %w[a] \n"
				504	: [result]"=&r"(result) \
				505	: [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
				506	:
				507	);
				508	return result;
				509	}
				510	inline int64_t gglMulii(int32_t x, int32_t y) CONST;
				511	inline int64_t gglMulii(int32_t x, int32_t y)
				512	{
				513	int64_t res;
				514	asm("smull %x0, %w1, %w2 \n"
				515	: "=r"(res)
				516	: "%r"(x), "r"(y)
				517	:
				518	);
				519	return res;
				520	}
				521
				522	#elif defined(__mips__) && __mips_isa_rev == 6
				523
				524	/inline MIPS implementations/
				525	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
				526	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
				527	GGLfixed result,tmp,tmp1,tmp2;
				528
				529	if (__builtin_constant_p(shift)) {
				530	if (shift == 0) {
				531	asm ("mul %[res], %[a], %[b] \t\n"
				532	: [res]"=&r"(result)
				533	: [a]"r"(a),[b]"r"(b)
				534	);
				535	} else if (shift == 32)
				536	{
				537	asm ("mul %[res], %[a], %[b] \t\n"
				538	"li %[tmp],1\t\n"
				539	"sll %[tmp],%[tmp],0x1f\t\n"
				540	"addu %[tmp1],%[tmp],%[res] \t\n"
				541	"muh %[res], %[a], %[b] \t\n"
				542	"sltu %[tmp1],%[tmp1],%[tmp]\t\n" /obit/
				543	"sra %[tmp],%[tmp],0x1f \t\n"
				544	"addu %[res],%[res],%[tmp]\t\n"
				545	"addu %[res],%[res],%[tmp1]\t\n"
				546	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
				547	: [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
				548	);
				549	} else if ((shift >0) && (shift < 32))
				550	{
				551	asm ("mul %[res], %[a], %[b] \t\n"
				552	"li %[tmp],1 \t\n"
				553	"sll %[tmp],%[tmp],%[shiftm1] \t\n"
				554	"addu %[tmp1],%[tmp],%[res] \t\n"
				555	"sltu %[tmp1],%[tmp1],%[tmp] \t\n" /obit?/
				556	"addu %[res],%[res],%[tmp] \t\n"
				557	"muh %[tmp], %[a], %[b] \t\n"
				558	"addu %[tmp],%[tmp],%[tmp1] \t\n"
				559	"sll %[tmp],%[tmp],%[lshift] \t\n"
				560	"srl %[res],%[res],%[rshift] \t\n"
				561	"or %[res],%[res],%[tmp] \t\n"
				562	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				563	: [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
				564	);
				565	} else {
				566	asm ("mul %[res], %[a], %[b] \t\n"
				567	"li %[tmp],1 \t\n"
				568	"sll %[tmp],%[tmp],%[shiftm1] \t\n"
				569	"addu %[tmp1],%[tmp],%[res] \t\n"
				570	"sltu %[tmp1],%[tmp1],%[tmp] \t\n" /obit?/
				571	"sra %[tmp2],%[tmp],0x1f \t\n"
				572	"addu %[res],%[res],%[tmp] \t\n"
				573	"muh %[tmp], %[a], %[b] \t\n"
				574	"addu %[tmp],%[tmp],%[tmp2] \t\n"
				575	"addu %[tmp],%[tmp],%[tmp1] \t\n" /tmp=hi/
				576	"srl %[tmp2],%[res],%[rshift] \t\n"
				577	"srav %[res], %[tmp],%[rshift]\t\n"
				578	"sll %[tmp],%[tmp],1 \t\n"
				579	"sll %[tmp],%[tmp],%[norbits] \t\n"
				580	"or %[tmp],%[tmp],%[tmp2] \t\n"
				581	"seleqz %[tmp],%[tmp],%[bit5] \t\n"
				582	"selnez %[res],%[res],%[bit5] \t\n"
				583	"or %[res],%[res],%[tmp] \t\n"
				584	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				585	: [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
				586	);
				587	}
				588	} else {
				589	asm ("mul %[res], %[a], %[b] \t\n"
				590	"li %[tmp],1 \t\n"
				591	"sll %[tmp],%[tmp],%[shiftm1] \t\n"
				592	"addu %[tmp1],%[tmp],%[res] \t\n"
				593	"sltu %[tmp1],%[tmp1],%[tmp] \t\n" /obit?/
				594	"sra %[tmp2],%[tmp],0x1f \t\n"
				595	"addu %[res],%[res],%[tmp] \t\n"
				596	"muh %[tmp], %[a], %[b] \t\n"
				597	"addu %[tmp],%[tmp],%[tmp2] \t\n"
				598	"addu %[tmp],%[tmp],%[tmp1] \t\n" /tmp=hi/
				599	"srl %[tmp2],%[res],%[rshift] \t\n"
				600	"srav %[res], %[tmp],%[rshift]\t\n"
				601	"sll %[tmp],%[tmp],1 \t\n"
				602	"sll %[tmp],%[tmp],%[norbits] \t\n"
				603	"or %[tmp],%[tmp],%[tmp2] \t\n"
				604	"seleqz %[tmp],%[tmp],%[bit5] \t\n"
				605	"selnez %[res],%[res],%[bit5] \t\n"
				606	"or %[res],%[res],%[tmp] \t\n"
				607	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				608	: [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
				609	);
				610	}
				611	return result;
				612	}
				613
				614	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				615	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				616	GGLfixed result,t,tmp1,tmp2;
				617
				618	if (__builtin_constant_p(shift)) {
				619	if (shift == 0) {
				620	asm ("mul %[lo], %[a], %[b] \t\n"
				621	"addu %[lo],%[lo],%[c] \t\n"
				622	: [lo]"=&r"(result)
				623	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				624	);
				625	} else if (shift == 32) {
				626	asm ("muh %[lo], %[a], %[b] \t\n"
				627	"addu %[lo],%[lo],%[c] \t\n"
				628	: [lo]"=&r"(result)
				629	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				630	);
				631	} else if ((shift>0) && (shift<32)) {
				632	asm ("mul %[res], %[a], %[b] \t\n"
				633	"muh %[t], %[a], %[b] \t\n"
				634	"srl %[res],%[res],%[rshift] \t\n"
				635	"sll %[t],%[t],%[lshift] \t\n"
				636	"or %[res],%[res],%[t] \t\n"
				637	"addu %[res],%[res],%[c] \t\n"
				638	: [res]"=&r"(result),[t]"=&r"(t)
				639	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
				640	);
				641	} else {
				642	asm ("mul %[res], %[a], %[b] \t\n"
				643	"muh %[t], %[a], %[b] \t\n"
				644	"nor %[tmp1],$zero,%[shift]\t\n"
				645	"srl %[res],%[res],%[shift] \t\n"
				646	"sll %[tmp2],%[t],1 \t\n"
				647	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				648	"or %[tmp1],%[tmp2],%[res] \t\n"
				649	"srav %[res],%[t],%[shift] \t\n"
				650	"andi %[tmp2],%[shift],0x20\t\n"
				651	"seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
				652	"selnez %[res],%[res],%[tmp2]\t\n"
				653	"or %[res],%[res],%[tmp1]\t\n"
				654	"addu %[res],%[res],%[c] \t\n"
				655	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				656	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
				657	);
				658	}
				659	} else {
				660	asm ("mul %[res], %[a], %[b] \t\n"
				661	"muh %[t], %[a], %[b] \t\n"
				662	"nor %[tmp1],$zero,%[shift]\t\n"
				663	"srl %[res],%[res],%[shift] \t\n"
				664	"sll %[tmp2],%[t],1 \t\n"
				665	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				666	"or %[tmp1],%[tmp2],%[res] \t\n"
				667	"srav %[res],%[t],%[shift] \t\n"
				668	"andi %[tmp2],%[shift],0x20\t\n"
				669	"seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
				670	"selnez %[res],%[res],%[tmp2]\t\n"
				671	"or %[res],%[res],%[tmp1]\t\n"
				672	"addu %[res],%[res],%[c] \t\n"
				673	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				674	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
				675	);
				676	}
				677	return result;
				678	}
				679
				680	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				681	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				682	GGLfixed result,t,tmp1,tmp2;
				683
				684	if (__builtin_constant_p(shift)) {
				685	if (shift == 0) {
				686	asm ("mul %[lo], %[a], %[b] \t\n"
				687	"subu %[lo],%[lo],%[c] \t\n"
				688	: [lo]"=&r"(result)
				689	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				690	);
				691	} else if (shift == 32) {
				692	asm ("muh %[lo], %[a], %[b] \t\n"
				693	"subu %[lo],%[lo],%[c] \t\n"
				694	: [lo]"=&r"(result)
				695	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				696	);
				697	} else if ((shift>0) && (shift<32)) {
				698	asm ("mul %[res], %[a], %[b] \t\n"
				699	"muh %[t], %[a], %[b] \t\n"
				700	"srl %[res],%[res],%[rshift] \t\n"
				701	"sll %[t],%[t],%[lshift] \t\n"
				702	"or %[res],%[res],%[t] \t\n"
				703	"subu %[res],%[res],%[c] \t\n"
				704	: [res]"=&r"(result),[t]"=&r"(t)
				705	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
				706	);
				707	} else {
				708	asm ("mul %[res], %[a], %[b] \t\n"
				709	"muh %[t], %[a], %[b] \t\n"
				710	"nor %[tmp1],$zero,%[shift]\t\n"
				711	"srl %[res],%[res],%[shift] \t\n"
				712	"sll %[tmp2],%[t],1 \t\n"
				713	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				714	"or %[tmp1],%[tmp2],%[res] \t\n"
				715	"srav %[res],%[t],%[shift] \t\n"
				716	"andi %[tmp2],%[shift],0x20\t\n"
				717	"seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
				718	"selnez %[res],%[res],%[tmp2]\t\n"
				719	"or %[res],%[res],%[tmp1]\t\n"
				720	"subu %[res],%[res],%[c] \t\n"
				721	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				722	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
				723	);
				724	}
				725	} else {
				726	asm ("mul %[res], %[a], %[b] \t\n"
				727	"muh %[t], %[a], %[b] \t\n"
				728	"nor %[tmp1],$zero,%[shift]\t\n"
				729	"srl %[res],%[res],%[shift] \t\n"
				730	"sll %[tmp2],%[t],1 \t\n"
				731	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				732	"or %[tmp1],%[tmp2],%[res] \t\n"
				733	"srav %[res],%[t],%[shift] \t\n"
				734	"andi %[tmp2],%[shift],0x20\t\n"
				735	"seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
				736	"selnez %[res],%[res],%[tmp2]\t\n"
				737	"or %[res],%[res],%[tmp1]\t\n"
				738	"subu %[res],%[res],%[c] \t\n"
				739	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				740	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
				741	);
				742	}
				743	return result;
				744	}
				745
				746	inline int64_t gglMulii(int32_t x, int32_t y) CONST;
				747	inline int64_t gglMulii(int32_t x, int32_t y) {
				748	union {
				749	struct {
				750	#if defined(__MIPSEL__)
				751	int32_t lo;
				752	int32_t hi;
				753	#elif defined(__MIPSEB__)
				754	int32_t hi;
				755	int32_t lo;
				756	#endif
				757	} s;
				758	int64_t res;
				759	}u;
				760	asm("mul %0, %2, %3 \t\n"
				761	"muh %1, %2, %3 \t\n"
				762	: "=r"(u.s.lo), "=&r"(u.s.hi)
				763	: "%r"(x), "r"(y)
				764	);
				765	return u.res;
				766	}
				767
				768	#else // ----------------------------------------------------------------------
				769
				770	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
				771	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
				772	return GGLfixed((int64_t(a)*b + (1<<(shift-1)))>>shift);
				773	}
				774	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				775	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				776	return GGLfixed((int64_t(a)*b)>>shift) + c;
				777	}
				778	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				779	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				780	return GGLfixed((int64_t(a)*b)>>shift) - c;
				781	}
				782	inline int64_t gglMulii(int32_t a, int32_t b) CONST;
				783	inline int64_t gglMulii(int32_t a, int32_t b) {
				784	return int64_t(a)*b;
				785	}
				786
				787	#endif
				788
				789	// ------------------------------------------------------------------------
				790
				791	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) CONST;
				792	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) {
				793	return gglMulx(a, b, 16);
				794	}
				795	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
				796	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) {
				797	return gglMulAddx(a, b, c, 16);
				798	}
				799	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
				800	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) {
				801	return gglMulSubx(a, b, c, 16);
				802	}
				803
				804	// ------------------------------------------------------------------------
				805
				806	inline int32_t gglClz(int32_t x) CONST;
				807	inline int32_t gglClz(int32_t x)
				808	{
				809	#if (defined(__arm__) && !defined(__thumb__)) \|\| defined(__mips__) \|\| defined(__aarch64__)
				810	return __builtin_clz(x);
				811	#else
				812	if (!x) return 32;
				813	int32_t exp = 31;
				814	if (x & 0xFFFF0000) { exp -=16; x >>= 16; }
				815	if (x & 0x0000ff00) { exp -= 8; x >>= 8; }
				816	if (x & 0x000000f0) { exp -= 4; x >>= 4; }
				817	if (x & 0x0000000c) { exp -= 2; x >>= 2; }
				818	if (x & 0x00000002) { exp -= 1; }
				819	return exp;
				820	#endif
				821	}
				822
				823	// ------------------------------------------------------------------------
				824
				825	int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) CONST;
				826
				827	inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) CONST;
				828	inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) {
				829	return gglDivQ(n, d, 16);
				830	}
				831
				832	inline int32_t gglDivx(GGLfixed n, GGLfixed d) CONST;
				833	inline int32_t gglDivx(GGLfixed n, GGLfixed d) {
				834	return gglDivQ(n, d, 16);
				835	}
				836
				837	// ------------------------------------------------------------------------
				838
				839	inline GGLfixed gglRecipFast(GGLfixed x) CONST;
				840	inline GGLfixed gglRecipFast(GGLfixed x)
				841	{
				842	// This is a really bad approximation of 1/x, but it's also
				843	// very fast. x must be strictly positive.
				844	// if x between [0.5, 1[ , then 1/x = 3-2*x
				845	// (we use 2.30 fixed-point)
				846	const int32_t lz = gglClz(x);
				847	return (0xC0000000 - (x << (lz - 1))) >> (30-lz);
				848	}
				849
				850	// ------------------------------------------------------------------------
				851
				852	inline GGLfixed gglClampx(GGLfixed c) CONST;
				853	inline GGLfixed gglClampx(GGLfixed c)
				854	{
				855	#if defined(__thumb__)
				856	// clamp without branches
				857	c &= ~(c>>31); c = FIXED_ONE - c;
				858	c &= ~(c>>31); c = FIXED_ONE - c;
				859	#else
				860	#if defined(__arm__)
				861	// I don't know why gcc thinks its smarter than me! The code below
				862	// clamps to zero in one instruction, but gcc won't generate it and
				863	// replace it by a cmp + movlt (it's quite amazing actually).
				864	asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c));
				865	#elif defined(__aarch64__)
				866	asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c));
				867	#else
				868	c &= ~(c>>31);
				869	#endif
				870	if (c>FIXED_ONE)
				871	c = FIXED_ONE;
				872	#endif
				873	return c;
				874	}
				875
				876	// ------------------------------------------------------------------------
				877
				878	#endif // ANDROID_GGL_FIXED_H