added tomsfastmath-0.07

This commit is contained in:
Tom St Denis 2005-11-18 05:16:25 +00:00 committed by Steffen Jaeckel
parent 091b337fe8
commit 9ce1fe4656
15 changed files with 190 additions and 174 deletions

View File

@ -1,3 +1,8 @@
November 18th, 2005
0.07 -- Fixes to fp_mul and fp_sqr to clean up the handling of the defines, fix to tfm.h to also clear up the prototypes.
-- Updates to build and run on a IBM PPC 405 [using GCC 3.4.4]
-- Made the "make" command renamable in the build system
October 31st, 2005 October 31st, 2005
0.06 -- fixed fp_mul() and fp_sqr() to trim digits when overflows would occur. Produces numerically inprecise results 0.06 -- fixed fp_mul() and fp_sqr() to trim digits when overflows would occur. Produces numerically inprecise results
(e.g. the lower FP_SIZE digits) but shouldn't segfault at least ;-) (e.g. the lower FP_SIZE digits) but shouldn't segfault at least ;-)

View File

@ -31,9 +31,9 @@ static ulong64 TIMFUNC (void)
__asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx"); __asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
return a; return a;
#elif defined(TFM_PPC32) #elif defined(TFM_PPC32)
unsigned long a; unsigned long a, b;
__asm__ __volatile__ ("mftb %0":"=r"(a)); __asm__ __volatile__ ("mftbu %1 \nmftb %0\n":"=r"(a), "=r"(b));
return a; return (((ulong64)b) << 32ULL) | ((ulong64)a);
#else /* gcc-IA64 version */ #else /* gcc-IA64 version */
unsigned long result; unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");

Binary file not shown.

View File

@ -259,24 +259,24 @@ asm( \
#define INNERMUL \ #define INNERMUL \
asm( \ asm( \
" mullw r16,%3,%4 \n\t" \ " mullw 16,%3,%4 \n\t" \
" mulhwu r17,%3,%4 \n\t" \ " mulhwu 17,%3,%4 \n\t" \
" addc r16,r16,%0 \n\t" \ " addc 16,16,%0 \n\t" \
" addze r17,r17 \n\t" \ " addze 17,17 \n\t" \
" lwz r18,%1 \n\t" \ " lwz 18,%1 \n\t" \
" addc r16,r16,r18 \n\t" \ " addc 16,16,18 \n\t" \
" addze %0,r17 \n\t" \ " addze %0,17 \n\t" \
" stw r16,%1 \n\t" \ " stw 16,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r16", "r17", "r18","%cc"); :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
#define PROPCARRY \ #define PROPCARRY \
asm( \ asm( \
" lwz r16,%1 \n\t" \ " lwz 16,%1 \n\t" \
" addc r16,r16,%0 \n\t" \ " addc 16,16,%0 \n\t" \
" stw r16,%1 \n\t" \ " stw 16,%1 \n\t" \
" xor %0,%0,%0 \n\t" \ " xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \ " addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc"); :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
/******************************************************************/ /******************************************************************/
#else #else

View File

@ -34,28 +34,28 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
#ifdef TFM_SMALL_SET #ifdef TFM_SMALL_SET
if (y <= 16) { if (y <= 16) {
fp_mul_comba_small(A,B,C); fp_mul_comba_small(A,B,C);
#elif defined(TFM_HUGE) return;
if (0) { 1; }
#endif #endif
#if defined(TFM_MUL32) #if defined(TFM_MUL32)
} else if (y <= 32) { if (yy >= 24 && y <= 32) {
fp_mul_comba32(A,B,C); fp_mul_comba32(A,B,C);
return;
}
#endif #endif
#if defined(TFM_MUL48) #if defined(TFM_MUL48)
} else if (y <= 48) { if (yy >= 40 && y <= 48) {
fp_mul_comba48(A,B,C); fp_mul_comba48(A,B,C);
return;
}
#endif #endif
#if defined(TFM_MUL64) #if defined(TFM_MUL64)
} else if (y <= 64) { if (yy >= 56 && y <= 64) {
fp_mul_comba64(A,B,C); fp_mul_comba64(A,B,C);
#endif return;
#if !defined(TFM_HUGE) && !defined(TFM_SMALL_SET) }
{
#else
} else {
#endif #endif
fp_mul_comba(A,B,C); fp_mul_comba(A,B,C);
}
} else { } else {
/* do the karatsuba action /* do the karatsuba action

View File

@ -179,12 +179,12 @@ asm( \
/* untested: will mulhwu change the flags? Docs say no */ /* untested: will mulhwu change the flags? Docs say no */
#define MULADD(i, j) \ #define MULADD(i, j) \
asm( \ asm( \
" mullw r16,%6,%7 \n\t" \ " mullw 16,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" mulhwu r16,%6,%7 \n\t" \ " mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \ " adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
#else #else
/* ISO C code */ /* ISO C code */

View File

@ -27,29 +27,28 @@ void fp_sqr(fp_int *A, fp_int *B)
#if defined(TFM_SMALL_SET) #if defined(TFM_SMALL_SET)
if (y <= 16) { if (y <= 16) {
fp_sqr_comba_small(A,B); fp_sqr_comba_small(A,B);
#elif defined(TFM_HUGE) return;
if (0) { 1; }
#endif #endif
#if defined(TFM_SQR32) #if defined(TFM_SQR32)
} else if (y <= 32) { if (y <= 32) {
fp_sqr_comba32(A,B); fp_sqr_comba32(A,B);
return;
}
#endif #endif
#if defined(TFM_SQR48) #if defined(TFM_SQR48)
} else if (y <= 48) { if (y <= 48) {
fp_sqr_comba48(A,B); fp_sqr_comba48(A,B);
return;
}
#endif #endif
#if defined(TFM_SQR64) #if defined(TFM_SQR64)
} else if (y <= 64) { if (y <= 64) {
fp_sqr_comba64(A,B); fp_sqr_comba64(A,B);
#endif return;
#if !defined(TFM_SMALL_SET) && !defined(TFM_HUGE) }
{
#else
} else {
#endif #endif
fp_sqr_comba(A, B); fp_sqr_comba(A, B);
}
} else { } else {
/* do the karatsuba action /* do the karatsuba action

View File

@ -303,7 +303,7 @@ asm( \
/* PPC32 */ /* PPC32 */
#define COMBA_START \ #define COMBA_START
#define CLEAR_CARRY \ #define CLEAR_CARRY \
c0 = c1 = c2 = 0; c0 = c1 = c2 = 0;
@ -317,30 +317,30 @@ asm( \
#define CARRY_FORWARD \ #define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0); do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI \ #define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */ /* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \ #define SQRADD(i, j) \
asm( \ asm( \
" mullw r16,%6,%6 \n\t" \ " mullw 16,%6,%6 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" mulhwu r16,%6,%6 \n\t" \ " mulhwu 16,%6,%6 \n\t" \
" adde %1,%1,r16 \n\t" \ " adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
/* for squaring some of the terms are doubled... */ /* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \ #define SQRADD2(i, j) \
asm( \ asm( \
" mullw r16,%6,%7 \n\t" \ " mullw 16,%6,%7 \n\t" \
" mulhwu r17,%6,%7 \n\t" \ " mulhwu 17,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" adde %1,%1,r17 \n\t" \ " adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" adde %1,%1,r17 \n\t" \ " adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
#define SQRADDSC(i, j) \ #define SQRADDSC(i, j) \
asm( \ asm( \
@ -351,12 +351,12 @@ asm( \
#define SQRADDAC(i, j) \ #define SQRADDAC(i, j) \
asm( \ asm( \
" mullw r16,%6,%7 \n\t" \ " mullw 16,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" mulhwu r16,%6,%7 \n\t" \ " mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \ " adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc"); :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
#define SQRADDDB \ #define SQRADDDB \
asm( \ asm( \

View File

@ -1,10 +1,14 @@
#makefile for TomsFastMath #makefile for TomsFastMath
# #
# #
VERSION=0.06 VERSION=0.07
CFLAGS += -Wall -W -Wshadow -I./ CFLAGS += -Wall -W -Wshadow -I./
ifndef MAKE
MAKE=make
endif
ifndef IGNORE_SPEED ifndef IGNORE_SPEED
CFLAGS += -O3 -funroll-all-loops CFLAGS += -O3 -funroll-all-loops
@ -85,7 +89,7 @@ install: $(LIBNAME)
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH) install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
mtest/mtest: mtest/mtest.c mtest/mtest: mtest/mtest.c
cd mtest ; CFLAGS="$(CFLAGS) -I../" make mtest cd mtest ; CFLAGS="$(CFLAGS) -I../" MAKE=${MAKE} ${MAKE} mtest
test: $(LIBNAME) demo/test.o mtest/mtest test: $(LIBNAME) demo/test.o mtest/mtest
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test $(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
@ -128,7 +132,7 @@ clean:
rm -f `find . -type f | grep "[.]dpi" | xargs` rm -f `find . -type f | grep "[.]dpi" | xargs`
rm -rf `find . -type d | grep "[.]libs" | xargs` rm -rf `find . -type d | grep "[.]libs" | xargs`
rm -f tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc rm -f tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc
cd mtest ; make clean cd mtest ; MAKE=${MAKE} ${MAKE} clean
no_oops: clean no_oops: clean
cd .. ; cvs commit cd .. ; cvs commit
@ -140,8 +144,9 @@ zipup: no_oops docs clean
cd .. ; rm -rf tfm* tomsfastmath-$(VERSION) ; mkdir tomsfastmath-$(VERSION) ; \ cd .. ; rm -rf tfm* tomsfastmath-$(VERSION) ; mkdir tomsfastmath-$(VERSION) ; \
cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \ cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \
tar -c tomsfastmath-$(VERSION)/* | bzip2 -9vvc > tfm-$(VERSION).tar.bz2 ; \ tar -c tomsfastmath-$(VERSION)/* | bzip2 -9vvc > tfm-$(VERSION).tar.bz2 ; \
zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/* zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/* ; \
mv -f tfm* ~ ; rm -rf tomsfastmath-$(VERSION)
# $Source: /cvs/libtom/tomsfastmath/makefile,v $ # $Source: /cvs/libtom/tomsfastmath/makefile,v $
# $Revision: 1.19 $ # $Revision: 1.23 $
# $Date: 2005/08/25 23:53:40 $ # $Date: 2005/11/18 06:13:57 $

View File

@ -1,7 +1,7 @@
#makefile for TomsFastMath #makefile for TomsFastMath
# #
# #
VERSION=0:6 VERSION=0:7
CC=libtool --mode=compile gcc CC=libtool --mode=compile gcc
@ -104,6 +104,6 @@ stest: $(LIBNAME) demo/stest.o
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest $(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest
# $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $ # $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $
# $Revision: 1.7 $ # $Revision: 1.8 $
# $Date: 2005/10/06 23:31:17 $ # $Date: 2005/10/31 17:26:00 $

View File

@ -1951,24 +1951,24 @@ asm( \
#define INNERMUL \ #define INNERMUL \
asm( \ asm( \
" mullw r16,%3,%4 \n\t" \ " mullw 16,%3,%4 \n\t" \
" mulhwu r17,%3,%4 \n\t" \ " mulhwu 17,%3,%4 \n\t" \
" addc r16,r16,%0 \n\t" \ " addc 16,16,%0 \n\t" \
" addze r17,r17 \n\t" \ " addze 17,17 \n\t" \
" lwz r18,%1 \n\t" \ " lwz 18,%1 \n\t" \
" addc r16,r16,r18 \n\t" \ " addc 16,16,18 \n\t" \
" addze %0,r17 \n\t" \ " addze %0,17 \n\t" \
" stw r16,%1 \n\t" \ " stw 16,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r16", "r17", "r18","%cc"); :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
#define PROPCARRY \ #define PROPCARRY \
asm( \ asm( \
" lwz r16,%1 \n\t" \ " lwz 16,%1 \n\t" \
" addc r16,r16,%0 \n\t" \ " addc 16,16,%0 \n\t" \
" stw r16,%1 \n\t" \ " stw 16,%1 \n\t" \
" xor %0,%0,%0 \n\t" \ " xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \ " addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc"); :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
/******************************************************************/ /******************************************************************/
#else #else
@ -2169,28 +2169,28 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
#ifdef TFM_SMALL_SET #ifdef TFM_SMALL_SET
if (y <= 16) { if (y <= 16) {
fp_mul_comba_small(A,B,C); fp_mul_comba_small(A,B,C);
#elif defined(TFM_HUGE) return;
if (0) { 1; }
#endif #endif
#if defined(TFM_MUL32) #if defined(TFM_MUL32)
} else if (y <= 32) { if (yy >= 24 && y <= 32) {
fp_mul_comba32(A,B,C); fp_mul_comba32(A,B,C);
return;
}
#endif #endif
#if defined(TFM_MUL48) #if defined(TFM_MUL48)
} else if (y <= 48) { if (yy >= 40 && y <= 48) {
fp_mul_comba48(A,B,C); fp_mul_comba48(A,B,C);
return;
}
#endif #endif
#if defined(TFM_MUL64) #if defined(TFM_MUL64)
} else if (y <= 64) { if (yy >= 56 && y <= 64) {
fp_mul_comba64(A,B,C); fp_mul_comba64(A,B,C);
#endif return;
#if !defined(TFM_HUGE) && !defined(TFM_SMALL_SET) }
{
#else
} else {
#endif #endif
fp_mul_comba(A,B,C); fp_mul_comba(A,B,C);
}
} else { } else {
/* do the karatsuba action /* do the karatsuba action
@ -2596,12 +2596,12 @@ asm( \
/* untested: will mulhwu change the flags? Docs say no */ /* untested: will mulhwu change the flags? Docs say no */
#define MULADD(i, j) \ #define MULADD(i, j) \
asm( \ asm( \
" mullw r16,%6,%7 \n\t" \ " mullw 16,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" mulhwu r16,%6,%7 \n\t" \ " mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \ " adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
#else #else
/* ISO C code */ /* ISO C code */
@ -5740,29 +5740,28 @@ void fp_sqr(fp_int *A, fp_int *B)
#if defined(TFM_SMALL_SET) #if defined(TFM_SMALL_SET)
if (y <= 16) { if (y <= 16) {
fp_sqr_comba_small(A,B); fp_sqr_comba_small(A,B);
#elif defined(TFM_HUGE) return;
if (0) { 1; }
#endif #endif
#if defined(TFM_SQR32) #if defined(TFM_SQR32)
} else if (y <= 32) { if (y <= 32) {
fp_sqr_comba32(A,B); fp_sqr_comba32(A,B);
return;
}
#endif #endif
#if defined(TFM_SQR48) #if defined(TFM_SQR48)
} else if (y <= 48) { if (y <= 48) {
fp_sqr_comba48(A,B); fp_sqr_comba48(A,B);
return;
}
#endif #endif
#if defined(TFM_SQR64) #if defined(TFM_SQR64)
} else if (y <= 64) { if (y <= 64) {
fp_sqr_comba64(A,B); fp_sqr_comba64(A,B);
#endif return;
#if !defined(TFM_SMALL_SET) && !defined(TFM_HUGE) }
{
#else
} else {
#endif #endif
fp_sqr_comba(A, B); fp_sqr_comba(A, B);
}
} else { } else {
/* do the karatsuba action /* do the karatsuba action
@ -6150,7 +6149,7 @@ asm( \
/* PPC32 */ /* PPC32 */
#define COMBA_START \ #define COMBA_START
#define CLEAR_CARRY \ #define CLEAR_CARRY \
c0 = c1 = c2 = 0; c0 = c1 = c2 = 0;
@ -6164,30 +6163,30 @@ asm( \
#define CARRY_FORWARD \ #define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0); do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI \ #define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */ /* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \ #define SQRADD(i, j) \
asm( \ asm( \
" mullw r16,%6,%6 \n\t" \ " mullw 16,%6,%6 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" mulhwu r16,%6,%6 \n\t" \ " mulhwu 16,%6,%6 \n\t" \
" adde %1,%1,r16 \n\t" \ " adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
/* for squaring some of the terms are doubled... */ /* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \ #define SQRADD2(i, j) \
asm( \ asm( \
" mullw r16,%6,%7 \n\t" \ " mullw 16,%6,%7 \n\t" \
" mulhwu r17,%6,%7 \n\t" \ " mulhwu 17,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" adde %1,%1,r17 \n\t" \ " adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" adde %1,%1,r17 \n\t" \ " adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
#define SQRADDSC(i, j) \ #define SQRADDSC(i, j) \
asm( \ asm( \
@ -6198,12 +6197,12 @@ asm( \
#define SQRADDAC(i, j) \ #define SQRADDAC(i, j) \
asm( \ asm( \
" mullw r16,%6,%7 \n\t" \ " mullw 16,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \ " addc %0,%0,16 \n\t" \
" mulhwu r16,%6,%7 \n\t" \ " mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \ " adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc"); :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
#define SQRADDDB \ #define SQRADDDB \
asm( \ asm( \

BIN
tfm.dvi

Binary file not shown.

12
tfm.h
View File

@ -372,9 +372,13 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C); void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
#endif #endif
#ifdef TFM_HUGE #ifdef TFM_MUL32
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C); void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL48
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C); void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL64
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C); void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
#endif #endif
@ -384,9 +388,13 @@ void fp_sqr_comba(fp_int *A, fp_int *B);
void fp_sqr_comba_small(fp_int *A, fp_int *B); void fp_sqr_comba_small(fp_int *A, fp_int *B);
#endif #endif
#ifdef TFM_HUGE #ifdef TFM_SQR32
void fp_sqr_comba32(fp_int *A, fp_int *B); void fp_sqr_comba32(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR48
void fp_sqr_comba48(fp_int *A, fp_int *B); void fp_sqr_comba48(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR64
void fp_sqr_comba64(fp_int *A, fp_int *B); void fp_sqr_comba64(fp_int *A, fp_int *B);
#endif #endif
extern const char *fp_s_rmap; extern const char *fp_s_rmap;

View File

@ -1,4 +1,4 @@
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.9.20) 31 OCT 2005 11:30 This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.11.6) 18 NOV 2005 06:14
entering extended mode entering extended mode
**tfm **tfm
(./tfm.tex (./tfm.tex
@ -329,4 +329,4 @@ Here is how much of TeX's memory you used:
580 hyphenation exceptions out of 1000 580 hyphenation exceptions out of 1000
25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s 25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s
Output written on tfm.dvi (25 pages, 51616 bytes). Output written on tfm.dvi (25 pages, 51624 bytes).

View File

@ -49,7 +49,7 @@
\begin{document} \begin{document}
\frontmatter \frontmatter
\pagestyle{empty} \pagestyle{empty}
\title{TomsFastMath User Manual \\ v0.06} \title{TomsFastMath User Manual \\ v0.07}
\author{Tom St Denis \\ tomstdenis@gmail.com} \author{Tom St Denis \\ tomstdenis@gmail.com}
\maketitle \maketitle
This text and library are all hereby placed in the public domain. This book has been formatted for B5 This text and library are all hereby placed in the public domain. This book has been formatted for B5