forked from ibphoenix/tomsfastmath
ed1a1e90e9
was not only a x86-64 problem
This commit is contained in:
parent
60cf64919f
commit
7882b0370d
@ -30,7 +30,7 @@ asm( \
|
|||||||
"movl %%edx,%1 \n\t" \
|
"movl %%edx,%1 \n\t" \
|
||||||
:"=g"(_c[LO]), "=r"(cy) \
|
:"=g"(_c[LO]), "=r"(cy) \
|
||||||
:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
|
:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
|
||||||
: "%eax", "%edx", "%cc")
|
: "%eax", "%edx", "cc")
|
||||||
|
|
||||||
#define PROPCARRY \
|
#define PROPCARRY \
|
||||||
asm( \
|
asm( \
|
||||||
@ -39,7 +39,7 @@ asm( \
|
|||||||
"movzbl %%al,%1 \n\t" \
|
"movzbl %%al,%1 \n\t" \
|
||||||
:"=g"(_c[LO]), "=r"(cy) \
|
:"=g"(_c[LO]), "=r"(cy) \
|
||||||
:"0"(_c[LO]), "1"(cy) \
|
:"0"(_c[LO]), "1"(cy) \
|
||||||
: "%eax", "%cc")
|
: "%eax", "cc")
|
||||||
|
|
||||||
/******************************************************************/
|
/******************************************************************/
|
||||||
#elif defined(TFM_X86_64)
|
#elif defined(TFM_X86_64)
|
||||||
@ -280,7 +280,7 @@ asm( \
|
|||||||
"movzbl %%al,%1 \n\t" \
|
"movzbl %%al,%1 \n\t" \
|
||||||
:"=g"(_c[LO]), "=r"(cy) \
|
:"=g"(_c[LO]), "=r"(cy) \
|
||||||
:"0"(_c[LO]), "1"(cy) \
|
:"0"(_c[LO]), "1"(cy) \
|
||||||
: "%eax", "%cc")
|
: "%eax", "cc")
|
||||||
|
|
||||||
/******************************************************************/
|
/******************************************************************/
|
||||||
#elif defined(TFM_ARM)
|
#elif defined(TFM_ARM)
|
||||||
@ -300,7 +300,7 @@ asm( \
|
|||||||
" MOVCC %0,#0 \n\t" \
|
" MOVCC %0,#0 \n\t" \
|
||||||
" UMLAL r0,%0,%3,%4 \n\t" \
|
" UMLAL r0,%0,%3,%4 \n\t" \
|
||||||
" STR r0,%1 \n\t" \
|
" STR r0,%1 \n\t" \
|
||||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc");
|
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc");
|
||||||
|
|
||||||
#define PROPCARRY \
|
#define PROPCARRY \
|
||||||
asm( \
|
asm( \
|
||||||
@ -309,7 +309,7 @@ asm( \
|
|||||||
" STR r0,%1 \n\t" \
|
" STR r0,%1 \n\t" \
|
||||||
" MOVCS %0,#1 \n\t" \
|
" MOVCS %0,#1 \n\t" \
|
||||||
" MOVCC %0,#0 \n\t" \
|
" MOVCC %0,#0 \n\t" \
|
||||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc");
|
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc");
|
||||||
|
|
||||||
/******************************************************************/
|
/******************************************************************/
|
||||||
#elif defined(TFM_PPC32)
|
#elif defined(TFM_PPC32)
|
||||||
@ -331,7 +331,7 @@ asm( \
|
|||||||
" addc 16,16,18 \n\t" \
|
" addc 16,16,18 \n\t" \
|
||||||
" addze %0,17 \n\t" \
|
" addze %0,17 \n\t" \
|
||||||
" stw 16,%1 \n\t" \
|
" stw 16,%1 \n\t" \
|
||||||
:"=r"(cy),"=g"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
|
:"=r"(cy),"=g"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
|
||||||
|
|
||||||
#define PROPCARRY \
|
#define PROPCARRY \
|
||||||
asm( \
|
asm( \
|
||||||
@ -340,7 +340,7 @@ asm( \
|
|||||||
" stw 16,%1 \n\t" \
|
" stw 16,%1 \n\t" \
|
||||||
" xor %0,%0,%0 \n\t" \
|
" xor %0,%0,%0 \n\t" \
|
||||||
" addze %0,%0 \n\t" \
|
" addze %0,%0 \n\t" \
|
||||||
:"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
|
:"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
|
||||||
|
|
||||||
/******************************************************************/
|
/******************************************************************/
|
||||||
#elif defined(TFM_PPC64)
|
#elif defined(TFM_PPC64)
|
||||||
@ -362,7 +362,7 @@ asm( \
|
|||||||
" addc r16,r16,r18 \n\t" \
|
" addc r16,r16,r18 \n\t" \
|
||||||
" addze %0,r17 \n\t" \
|
" addze %0,r17 \n\t" \
|
||||||
" sdx r16,0,%1 \n\t" \
|
" sdx r16,0,%1 \n\t" \
|
||||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","%cc"); ++tmpm;
|
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","cc"); ++tmpm;
|
||||||
|
|
||||||
#define PROPCARRY \
|
#define PROPCARRY \
|
||||||
asm( \
|
asm( \
|
||||||
@ -371,7 +371,7 @@ asm( \
|
|||||||
" sdx r16,0,%1 \n\t" \
|
" sdx r16,0,%1 \n\t" \
|
||||||
" xor %0,%0,%0 \n\t" \
|
" xor %0,%0,%0 \n\t" \
|
||||||
" addze %0,%0 \n\t" \
|
" addze %0,%0 \n\t" \
|
||||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
|
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","cc");
|
||||||
|
|
||||||
/******************************************************************/
|
/******************************************************************/
|
||||||
#elif defined(TFM_AVR32)
|
#elif defined(TFM_AVR32)
|
||||||
@ -401,7 +401,7 @@ asm( \
|
|||||||
" st.w %1,r2 \n\t" \
|
" st.w %1,r2 \n\t" \
|
||||||
" eor %0,%0 \n\t" \
|
" eor %0,%0 \n\t" \
|
||||||
" acr %0 \n\t" \
|
" acr %0 \n\t" \
|
||||||
:"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","%cc");
|
:"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc");
|
||||||
|
|
||||||
/******************************************************************/
|
/******************************************************************/
|
||||||
#elif defined(TFM_MIPS)
|
#elif defined(TFM_MIPS)
|
||||||
|
@ -53,7 +53,7 @@ asm( \
|
|||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
"adcl %%edx,%1 \n\t" \
|
"adcl %%edx,%1 \n\t" \
|
||||||
"adcl $0,%2 \n\t" \
|
"adcl $0,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
|
||||||
|
|
||||||
#elif defined(TFM_X86_64)
|
#elif defined(TFM_X86_64)
|
||||||
/* x86-64 optimized */
|
/* x86-64 optimized */
|
||||||
@ -128,7 +128,7 @@ asm( \
|
|||||||
"movd %%mm0,%%eax \n\t" \
|
"movd %%mm0,%%eax \n\t" \
|
||||||
"adcl %%eax,%1 \n\t" \
|
"adcl %%eax,%1 \n\t" \
|
||||||
"adcl $0,%2 \n\t" \
|
"adcl $0,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
|
||||||
|
|
||||||
#elif defined(TFM_ARM)
|
#elif defined(TFM_ARM)
|
||||||
/* ARM code */
|
/* ARM code */
|
||||||
@ -155,7 +155,7 @@ asm( \
|
|||||||
" ADDS %0,%0,r0 \n\t" \
|
" ADDS %0,%0,r0 \n\t" \
|
||||||
" ADCS %1,%1,r1 \n\t" \
|
" ADCS %1,%1,r1 \n\t" \
|
||||||
" ADC %2,%2,#0 \n\t" \
|
" ADC %2,%2,#0 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
|
||||||
|
|
||||||
#elif defined(TFM_PPC32)
|
#elif defined(TFM_PPC32)
|
||||||
/* For 32-bit PPC */
|
/* For 32-bit PPC */
|
||||||
|
@ -41,7 +41,7 @@ asm( \
|
|||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
"adcl %%edx,%1 \n\t" \
|
"adcl %%edx,%1 \n\t" \
|
||||||
"adcl $0,%2 \n\t" \
|
"adcl $0,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
|
||||||
|
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -53,7 +53,7 @@ asm( \
|
|||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
"adcl %%edx,%1 \n\t" \
|
"adcl %%edx,%1 \n\t" \
|
||||||
"adcl $0,%2 \n\t" \
|
"adcl $0,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -62,7 +62,7 @@ asm( \
|
|||||||
"movl %%eax,%0 \n\t" \
|
"movl %%eax,%0 \n\t" \
|
||||||
"movl %%edx,%1 \n\t" \
|
"movl %%edx,%1 \n\t" \
|
||||||
"xorl %2,%2 \n\t" \
|
"xorl %2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -71,7 +71,7 @@ asm( \
|
|||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
"adcl %%edx,%1 \n\t" \
|
"adcl %%edx,%1 \n\t" \
|
||||||
"adcl $0,%2 \n\t" \
|
"adcl $0,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm( \
|
asm( \
|
||||||
@ -81,7 +81,7 @@ asm( \
|
|||||||
"addl %6,%0 \n\t" \
|
"addl %6,%0 \n\t" \
|
||||||
"adcl %7,%1 \n\t" \
|
"adcl %7,%1 \n\t" \
|
||||||
"adcl %8,%2 \n\t" \
|
"adcl %8,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
|
||||||
|
|
||||||
#elif defined(TFM_X86_64)
|
#elif defined(TFM_X86_64)
|
||||||
/* x86-64 optimized */
|
/* x86-64 optimized */
|
||||||
@ -181,7 +181,7 @@ asm( \
|
|||||||
"movd %%mm0,%%eax \n\t" \
|
"movd %%mm0,%%eax \n\t" \
|
||||||
"adcl %%eax,%1 \n\t" \
|
"adcl %%eax,%1 \n\t" \
|
||||||
"adcl $0,%2 \n\t" \
|
"adcl $0,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
|
||||||
|
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -197,7 +197,7 @@ asm( \
|
|||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
"adcl %%edx,%1 \n\t" \
|
"adcl %%edx,%1 \n\t" \
|
||||||
"adcl $0,%2 \n\t" \
|
"adcl $0,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -221,7 +221,7 @@ asm( \
|
|||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
"adcl %%edx,%1 \n\t" \
|
"adcl %%edx,%1 \n\t" \
|
||||||
"adcl $0,%2 \n\t" \
|
"adcl $0,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm( \
|
asm( \
|
||||||
@ -231,7 +231,7 @@ asm( \
|
|||||||
"addl %6,%0 \n\t" \
|
"addl %6,%0 \n\t" \
|
||||||
"adcl %7,%1 \n\t" \
|
"adcl %7,%1 \n\t" \
|
||||||
"adcl %8,%2 \n\t" \
|
"adcl %8,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
|
||||||
|
|
||||||
#elif defined(TFM_ARM)
|
#elif defined(TFM_ARM)
|
||||||
|
|
||||||
@ -260,7 +260,7 @@ asm( \
|
|||||||
" ADDS %0,%0,r0 \n\t" \
|
" ADDS %0,%0,r0 \n\t" \
|
||||||
" ADCS %1,%1,r1 \n\t" \
|
" ADCS %1,%1,r1 \n\t" \
|
||||||
" ADC %2,%2,#0 \n\t" \
|
" ADC %2,%2,#0 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
|
||||||
|
|
||||||
/* for squaring some of the terms are doubled... */
|
/* for squaring some of the terms are doubled... */
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
@ -272,13 +272,13 @@ asm( \
|
|||||||
" ADDS %0,%0,r0 \n\t" \
|
" ADDS %0,%0,r0 \n\t" \
|
||||||
" ADCS %1,%1,r1 \n\t" \
|
" ADCS %1,%1,r1 \n\t" \
|
||||||
" ADC %2,%2,#0 \n\t" \
|
" ADC %2,%2,#0 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" UMULL %0,%1,%6,%7 \n\t" \
|
" UMULL %0,%1,%6,%7 \n\t" \
|
||||||
" SUB %2,%2,%2 \n\t" \
|
" SUB %2,%2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -286,7 +286,7 @@ asm( \
|
|||||||
" ADDS %0,%0,r0 \n\t" \
|
" ADDS %0,%0,r0 \n\t" \
|
||||||
" ADCS %1,%1,r1 \n\t" \
|
" ADCS %1,%1,r1 \n\t" \
|
||||||
" ADC %2,%2,#0 \n\t" \
|
" ADC %2,%2,#0 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm( \
|
asm( \
|
||||||
@ -296,7 +296,7 @@ asm( \
|
|||||||
" ADDS %0,%0,%3 \n\t" \
|
" ADDS %0,%0,%3 \n\t" \
|
||||||
" ADCS %1,%1,%4 \n\t" \
|
" ADCS %1,%1,%4 \n\t" \
|
||||||
" ADC %2,%2,%5 \n\t" \
|
" ADC %2,%2,%5 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
|
||||||
|
|
||||||
#elif defined(TFM_PPC32)
|
#elif defined(TFM_PPC32)
|
||||||
|
|
||||||
@ -326,7 +326,7 @@ asm( \
|
|||||||
" mulhwu 16,%6,%6 \n\t" \
|
" mulhwu 16,%6,%6 \n\t" \
|
||||||
" adde %1,%1,16 \n\t" \
|
" adde %1,%1,16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
|
||||||
|
|
||||||
/* for squaring some of the terms are doubled... */
|
/* for squaring some of the terms are doubled... */
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
@ -339,14 +339,14 @@ asm( \
|
|||||||
" addc %0,%0,16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" adde %1,%1,17 \n\t" \
|
" adde %1,%1,17 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw %0,%6,%7 \n\t" \
|
" mullw %0,%6,%7 \n\t" \
|
||||||
" mulhwu %1,%6,%7 \n\t" \
|
" mulhwu %1,%6,%7 \n\t" \
|
||||||
" xor %2,%2,%2 \n\t" \
|
" xor %2,%2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -355,7 +355,7 @@ asm( \
|
|||||||
" mulhwu 16,%6,%7 \n\t" \
|
" mulhwu 16,%6,%7 \n\t" \
|
||||||
" adde %1,%1,16 \n\t" \
|
" adde %1,%1,16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm( \
|
asm( \
|
||||||
@ -365,7 +365,7 @@ asm( \
|
|||||||
" addc %0,%0,%3 \n\t" \
|
" addc %0,%0,%3 \n\t" \
|
||||||
" adde %1,%1,%4 \n\t" \
|
" adde %1,%1,%4 \n\t" \
|
||||||
" adde %2,%2,%5 \n\t" \
|
" adde %2,%2,%5 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
|
||||||
|
|
||||||
#elif defined(TFM_PPC64)
|
#elif defined(TFM_PPC64)
|
||||||
/* PPC64 */
|
/* PPC64 */
|
||||||
@ -394,7 +394,7 @@ asm( \
|
|||||||
" mulhdu r16,%6,%6 \n\t" \
|
" mulhdu r16,%6,%6 \n\t" \
|
||||||
" adde %1,%1,r16 \n\t" \
|
" adde %1,%1,r16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","cc");
|
||||||
|
|
||||||
/* for squaring some of the terms are doubled... */
|
/* for squaring some of the terms are doubled... */
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
@ -407,14 +407,14 @@ asm( \
|
|||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,r16 \n\t" \
|
||||||
" adde %1,%1,r17 \n\t" \
|
" adde %1,%1,r17 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mulld %0,%6,%7 \n\t" \
|
" mulld %0,%6,%7 \n\t" \
|
||||||
" mulhdu %1,%6,%7 \n\t" \
|
" mulhdu %1,%6,%7 \n\t" \
|
||||||
" xor %2,%2,%2 \n\t" \
|
" xor %2,%2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -423,7 +423,7 @@ asm( \
|
|||||||
" mulhdu r16,%6,%7 \n\t" \
|
" mulhdu r16,%6,%7 \n\t" \
|
||||||
" adde %1,%1,r16 \n\t" \
|
" adde %1,%1,r16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm( \
|
asm( \
|
||||||
@ -433,7 +433,7 @@ asm( \
|
|||||||
" addc %0,%0,%3 \n\t" \
|
" addc %0,%0,%3 \n\t" \
|
||||||
" adde %1,%1,%4 \n\t" \
|
" adde %1,%1,%4 \n\t" \
|
||||||
" adde %2,%2,%5 \n\t" \
|
" adde %2,%2,%5 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
|
||||||
|
|
||||||
|
|
||||||
#elif defined(TFM_AVR32)
|
#elif defined(TFM_AVR32)
|
||||||
@ -501,7 +501,7 @@ asm( \
|
|||||||
" add %0,%0,%3 \n\t" \
|
" add %0,%0,%3 \n\t" \
|
||||||
" adc %1,%1,%4 \n\t" \
|
" adc %1,%1,%4 \n\t" \
|
||||||
" adc %2,%2,%5 \n\t" \
|
" adc %2,%2,%5 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
|
||||||
|
|
||||||
#elif defined(TFM_MIPS)
|
#elif defined(TFM_MIPS)
|
||||||
|
|
||||||
@ -571,7 +571,7 @@ asm( \
|
|||||||
" mflo %0 \n\t" \
|
" mflo %0 \n\t" \
|
||||||
" mfhi %1 \n\t" \
|
" mfhi %1 \n\t" \
|
||||||
" xor %2,%2,%2 \n\t" \
|
" xor %2,%2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
|
Loading…
Reference in New Issue
Block a user