added tomsfastmath-0.07
This commit is contained in:
parent
091b337fe8
commit
9ce1fe4656
@ -1,3 +1,8 @@
|
|||||||
|
November 18th, 2005
|
||||||
|
0.07 -- Fixes to fp_mul and fp_sqr to clean up the handling of the defines, fix to tfm.h to also clear up the prototypes.
|
||||||
|
-- Updates to build and run on a IBM PPC 405 [using GCC 3.4.4]
|
||||||
|
-- Made the "make" command renamable in the build system
|
||||||
|
|
||||||
October 31st, 2005
|
October 31st, 2005
|
||||||
0.06 -- fixed fp_mul() and fp_sqr() to trim digits when overflows would occur. Produces numerically inprecise results
|
0.06 -- fixed fp_mul() and fp_sqr() to trim digits when overflows would occur. Produces numerically inprecise results
|
||||||
(e.g. the lower FP_SIZE digits) but shouldn't segfault at least ;-)
|
(e.g. the lower FP_SIZE digits) but shouldn't segfault at least ;-)
|
||||||
|
@ -23,7 +23,7 @@ static ulong64 TIMFUNC (void)
|
|||||||
{
|
{
|
||||||
#if defined __GNUC__
|
#if defined __GNUC__
|
||||||
#if defined(INTEL_CC)
|
#if defined(INTEL_CC)
|
||||||
ulong64 a;
|
ulong64 a;
|
||||||
asm ("rdtsc":"=A"(a));
|
asm ("rdtsc":"=A"(a));
|
||||||
return a;
|
return a;
|
||||||
#elif defined(__i386__) || defined(__x86_64__)
|
#elif defined(__i386__) || defined(__x86_64__)
|
||||||
@ -31,9 +31,9 @@ static ulong64 TIMFUNC (void)
|
|||||||
__asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
|
__asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
|
||||||
return a;
|
return a;
|
||||||
#elif defined(TFM_PPC32)
|
#elif defined(TFM_PPC32)
|
||||||
unsigned long a;
|
unsigned long a, b;
|
||||||
__asm__ __volatile__ ("mftb %0":"=r"(a));
|
__asm__ __volatile__ ("mftbu %1 \nmftb %0\n":"=r"(a), "=r"(b));
|
||||||
return a;
|
return (((ulong64)b) << 32ULL) | ((ulong64)a);
|
||||||
#else /* gcc-IA64 version */
|
#else /* gcc-IA64 version */
|
||||||
unsigned long result;
|
unsigned long result;
|
||||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||||
|
BIN
doc/tfm.pdf
BIN
doc/tfm.pdf
Binary file not shown.
@ -259,24 +259,24 @@ asm( \
|
|||||||
|
|
||||||
#define INNERMUL \
|
#define INNERMUL \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%3,%4 \n\t" \
|
" mullw 16,%3,%4 \n\t" \
|
||||||
" mulhwu r17,%3,%4 \n\t" \
|
" mulhwu 17,%3,%4 \n\t" \
|
||||||
" addc r16,r16,%0 \n\t" \
|
" addc 16,16,%0 \n\t" \
|
||||||
" addze r17,r17 \n\t" \
|
" addze 17,17 \n\t" \
|
||||||
" lwz r18,%1 \n\t" \
|
" lwz 18,%1 \n\t" \
|
||||||
" addc r16,r16,r18 \n\t" \
|
" addc 16,16,18 \n\t" \
|
||||||
" addze %0,r17 \n\t" \
|
" addze %0,17 \n\t" \
|
||||||
" stw r16,%1 \n\t" \
|
" stw 16,%1 \n\t" \
|
||||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r16", "r17", "r18","%cc");
|
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
|
||||||
|
|
||||||
#define PROPCARRY \
|
#define PROPCARRY \
|
||||||
asm( \
|
asm( \
|
||||||
" lwz r16,%1 \n\t" \
|
" lwz 16,%1 \n\t" \
|
||||||
" addc r16,r16,%0 \n\t" \
|
" addc 16,16,%0 \n\t" \
|
||||||
" stw r16,%1 \n\t" \
|
" stw 16,%1 \n\t" \
|
||||||
" xor %0,%0,%0 \n\t" \
|
" xor %0,%0,%0 \n\t" \
|
||||||
" addze %0,%0 \n\t" \
|
" addze %0,%0 \n\t" \
|
||||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
|
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
|
||||||
|
|
||||||
/******************************************************************/
|
/******************************************************************/
|
||||||
#else
|
#else
|
||||||
|
32
fp_mul.c
32
fp_mul.c
@ -34,28 +34,28 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
|
|||||||
#ifdef TFM_SMALL_SET
|
#ifdef TFM_SMALL_SET
|
||||||
if (y <= 16) {
|
if (y <= 16) {
|
||||||
fp_mul_comba_small(A,B,C);
|
fp_mul_comba_small(A,B,C);
|
||||||
#elif defined(TFM_HUGE)
|
return;
|
||||||
if (0) { 1;
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_MUL32)
|
#if defined(TFM_MUL32)
|
||||||
} else if (y <= 32) {
|
if (yy >= 24 && y <= 32) {
|
||||||
fp_mul_comba32(A,B,C);
|
fp_mul_comba32(A,B,C);
|
||||||
|
return;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_MUL48)
|
#if defined(TFM_MUL48)
|
||||||
} else if (y <= 48) {
|
if (yy >= 40 && y <= 48) {
|
||||||
fp_mul_comba48(A,B,C);
|
fp_mul_comba48(A,B,C);
|
||||||
#endif
|
return;
|
||||||
#if defined(TFM_MUL64)
|
|
||||||
} else if (y <= 64) {
|
|
||||||
fp_mul_comba64(A,B,C);
|
|
||||||
#endif
|
|
||||||
#if !defined(TFM_HUGE) && !defined(TFM_SMALL_SET)
|
|
||||||
{
|
|
||||||
#else
|
|
||||||
} else {
|
|
||||||
#endif
|
|
||||||
fp_mul_comba(A,B,C);
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(TFM_MUL64)
|
||||||
|
if (yy >= 56 && y <= 64) {
|
||||||
|
fp_mul_comba64(A,B,C);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
fp_mul_comba(A,B,C);
|
||||||
} else {
|
} else {
|
||||||
/* do the karatsuba action
|
/* do the karatsuba action
|
||||||
|
|
||||||
|
@ -179,12 +179,12 @@ asm( \
|
|||||||
/* untested: will mulhwu change the flags? Docs say no */
|
/* untested: will mulhwu change the flags? Docs say no */
|
||||||
#define MULADD(i, j) \
|
#define MULADD(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%6,%7 \n\t" \
|
" mullw 16,%6,%7 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" mulhwu r16,%6,%7 \n\t" \
|
" mulhwu 16,%6,%7 \n\t" \
|
||||||
" adde %1,%1,r16 \n\t" \
|
" adde %1,%1,16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/* ISO C code */
|
/* ISO C code */
|
||||||
|
25
fp_sqr.c
25
fp_sqr.c
@ -27,29 +27,28 @@ void fp_sqr(fp_int *A, fp_int *B)
|
|||||||
#if defined(TFM_SMALL_SET)
|
#if defined(TFM_SMALL_SET)
|
||||||
if (y <= 16) {
|
if (y <= 16) {
|
||||||
fp_sqr_comba_small(A,B);
|
fp_sqr_comba_small(A,B);
|
||||||
#elif defined(TFM_HUGE)
|
return;
|
||||||
if (0) { 1;
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_SQR32)
|
#if defined(TFM_SQR32)
|
||||||
} else if (y <= 32) {
|
if (y <= 32) {
|
||||||
fp_sqr_comba32(A,B);
|
fp_sqr_comba32(A,B);
|
||||||
|
return;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_SQR48)
|
#if defined(TFM_SQR48)
|
||||||
} else if (y <= 48) {
|
if (y <= 48) {
|
||||||
fp_sqr_comba48(A,B);
|
fp_sqr_comba48(A,B);
|
||||||
|
return;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_SQR64)
|
#if defined(TFM_SQR64)
|
||||||
} else if (y <= 64) {
|
if (y <= 64) {
|
||||||
fp_sqr_comba64(A,B);
|
fp_sqr_comba64(A,B);
|
||||||
#endif
|
return;
|
||||||
#if !defined(TFM_SMALL_SET) && !defined(TFM_HUGE)
|
|
||||||
{
|
|
||||||
#else
|
|
||||||
} else {
|
|
||||||
#endif
|
|
||||||
fp_sqr_comba(A, B);
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
fp_sqr_comba(A, B);
|
||||||
} else {
|
} else {
|
||||||
/* do the karatsuba action
|
/* do the karatsuba action
|
||||||
|
|
||||||
|
@ -303,7 +303,7 @@ asm( \
|
|||||||
|
|
||||||
/* PPC32 */
|
/* PPC32 */
|
||||||
|
|
||||||
#define COMBA_START \
|
#define COMBA_START
|
||||||
|
|
||||||
#define CLEAR_CARRY \
|
#define CLEAR_CARRY \
|
||||||
c0 = c1 = c2 = 0;
|
c0 = c1 = c2 = 0;
|
||||||
@ -317,30 +317,30 @@ asm( \
|
|||||||
#define CARRY_FORWARD \
|
#define CARRY_FORWARD \
|
||||||
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
|
||||||
|
|
||||||
#define COMBA_FINI \
|
#define COMBA_FINI
|
||||||
|
|
||||||
/* multiplies point i and j, updates carry "c1" and digit c2 */
|
/* multiplies point i and j, updates carry "c1" and digit c2 */
|
||||||
#define SQRADD(i, j) \
|
#define SQRADD(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%6,%6 \n\t" \
|
" mullw 16,%6,%6 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" mulhwu r16,%6,%6 \n\t" \
|
" mulhwu 16,%6,%6 \n\t" \
|
||||||
" adde %1,%1,r16 \n\t" \
|
" adde %1,%1,16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
|
||||||
|
|
||||||
/* for squaring some of the terms are doubled... */
|
/* for squaring some of the terms are doubled... */
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%6,%7 \n\t" \
|
" mullw 16,%6,%7 \n\t" \
|
||||||
" mulhwu r17,%6,%7 \n\t" \
|
" mulhwu 17,%6,%7 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" adde %1,%1,r17 \n\t" \
|
" adde %1,%1,17 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" adde %1,%1,r17 \n\t" \
|
" adde %1,%1,17 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -349,14 +349,14 @@ asm( \
|
|||||||
" xor %2,%2,%2 \n\t" \
|
" xor %2,%2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%6,%7 \n\t" \
|
" mullw 16,%6,%7 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" mulhwu r16,%6,%7 \n\t" \
|
" mulhwu 16,%6,%7 \n\t" \
|
||||||
" adde %1,%1,r16 \n\t" \
|
" adde %1,%1,16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm( \
|
asm( \
|
||||||
|
17
makefile
17
makefile
@ -1,10 +1,14 @@
|
|||||||
#makefile for TomsFastMath
|
#makefile for TomsFastMath
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
VERSION=0.06
|
VERSION=0.07
|
||||||
|
|
||||||
CFLAGS += -Wall -W -Wshadow -I./
|
CFLAGS += -Wall -W -Wshadow -I./
|
||||||
|
|
||||||
|
ifndef MAKE
|
||||||
|
MAKE=make
|
||||||
|
endif
|
||||||
|
|
||||||
ifndef IGNORE_SPEED
|
ifndef IGNORE_SPEED
|
||||||
|
|
||||||
CFLAGS += -O3 -funroll-all-loops
|
CFLAGS += -O3 -funroll-all-loops
|
||||||
@ -85,7 +89,7 @@ install: $(LIBNAME)
|
|||||||
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
|
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||||
|
|
||||||
mtest/mtest: mtest/mtest.c
|
mtest/mtest: mtest/mtest.c
|
||||||
cd mtest ; CFLAGS="$(CFLAGS) -I../" make mtest
|
cd mtest ; CFLAGS="$(CFLAGS) -I../" MAKE=${MAKE} ${MAKE} mtest
|
||||||
|
|
||||||
test: $(LIBNAME) demo/test.o mtest/mtest
|
test: $(LIBNAME) demo/test.o mtest/mtest
|
||||||
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
|
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
|
||||||
@ -128,7 +132,7 @@ clean:
|
|||||||
rm -f `find . -type f | grep "[.]dpi" | xargs`
|
rm -f `find . -type f | grep "[.]dpi" | xargs`
|
||||||
rm -rf `find . -type d | grep "[.]libs" | xargs`
|
rm -rf `find . -type d | grep "[.]libs" | xargs`
|
||||||
rm -f tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc
|
rm -f tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc
|
||||||
cd mtest ; make clean
|
cd mtest ; MAKE=${MAKE} ${MAKE} clean
|
||||||
|
|
||||||
no_oops: clean
|
no_oops: clean
|
||||||
cd .. ; cvs commit
|
cd .. ; cvs commit
|
||||||
@ -140,8 +144,9 @@ zipup: no_oops docs clean
|
|||||||
cd .. ; rm -rf tfm* tomsfastmath-$(VERSION) ; mkdir tomsfastmath-$(VERSION) ; \
|
cd .. ; rm -rf tfm* tomsfastmath-$(VERSION) ; mkdir tomsfastmath-$(VERSION) ; \
|
||||||
cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \
|
cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \
|
||||||
tar -c tomsfastmath-$(VERSION)/* | bzip2 -9vvc > tfm-$(VERSION).tar.bz2 ; \
|
tar -c tomsfastmath-$(VERSION)/* | bzip2 -9vvc > tfm-$(VERSION).tar.bz2 ; \
|
||||||
zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/*
|
zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/* ; \
|
||||||
|
mv -f tfm* ~ ; rm -rf tomsfastmath-$(VERSION)
|
||||||
|
|
||||||
# $Source: /cvs/libtom/tomsfastmath/makefile,v $
|
# $Source: /cvs/libtom/tomsfastmath/makefile,v $
|
||||||
# $Revision: 1.19 $
|
# $Revision: 1.23 $
|
||||||
# $Date: 2005/08/25 23:53:40 $
|
# $Date: 2005/11/18 06:13:57 $
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#makefile for TomsFastMath
|
#makefile for TomsFastMath
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
VERSION=0:6
|
VERSION=0:7
|
||||||
|
|
||||||
CC=libtool --mode=compile gcc
|
CC=libtool --mode=compile gcc
|
||||||
|
|
||||||
@ -104,6 +104,6 @@ stest: $(LIBNAME) demo/stest.o
|
|||||||
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest
|
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest
|
||||||
|
|
||||||
# $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $
|
# $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $
|
||||||
# $Revision: 1.7 $
|
# $Revision: 1.8 $
|
||||||
# $Date: 2005/10/06 23:31:17 $
|
# $Date: 2005/10/31 17:26:00 $
|
||||||
|
|
||||||
|
155
pre_gen/mpi.c
155
pre_gen/mpi.c
@ -1951,24 +1951,24 @@ asm( \
|
|||||||
|
|
||||||
#define INNERMUL \
|
#define INNERMUL \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%3,%4 \n\t" \
|
" mullw 16,%3,%4 \n\t" \
|
||||||
" mulhwu r17,%3,%4 \n\t" \
|
" mulhwu 17,%3,%4 \n\t" \
|
||||||
" addc r16,r16,%0 \n\t" \
|
" addc 16,16,%0 \n\t" \
|
||||||
" addze r17,r17 \n\t" \
|
" addze 17,17 \n\t" \
|
||||||
" lwz r18,%1 \n\t" \
|
" lwz 18,%1 \n\t" \
|
||||||
" addc r16,r16,r18 \n\t" \
|
" addc 16,16,18 \n\t" \
|
||||||
" addze %0,r17 \n\t" \
|
" addze %0,17 \n\t" \
|
||||||
" stw r16,%1 \n\t" \
|
" stw 16,%1 \n\t" \
|
||||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r16", "r17", "r18","%cc");
|
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
|
||||||
|
|
||||||
#define PROPCARRY \
|
#define PROPCARRY \
|
||||||
asm( \
|
asm( \
|
||||||
" lwz r16,%1 \n\t" \
|
" lwz 16,%1 \n\t" \
|
||||||
" addc r16,r16,%0 \n\t" \
|
" addc 16,16,%0 \n\t" \
|
||||||
" stw r16,%1 \n\t" \
|
" stw 16,%1 \n\t" \
|
||||||
" xor %0,%0,%0 \n\t" \
|
" xor %0,%0,%0 \n\t" \
|
||||||
" addze %0,%0 \n\t" \
|
" addze %0,%0 \n\t" \
|
||||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
|
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
|
||||||
|
|
||||||
/******************************************************************/
|
/******************************************************************/
|
||||||
#else
|
#else
|
||||||
@ -2169,28 +2169,28 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
|
|||||||
#ifdef TFM_SMALL_SET
|
#ifdef TFM_SMALL_SET
|
||||||
if (y <= 16) {
|
if (y <= 16) {
|
||||||
fp_mul_comba_small(A,B,C);
|
fp_mul_comba_small(A,B,C);
|
||||||
#elif defined(TFM_HUGE)
|
return;
|
||||||
if (0) { 1;
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_MUL32)
|
#if defined(TFM_MUL32)
|
||||||
} else if (y <= 32) {
|
if (yy >= 24 && y <= 32) {
|
||||||
fp_mul_comba32(A,B,C);
|
fp_mul_comba32(A,B,C);
|
||||||
|
return;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_MUL48)
|
#if defined(TFM_MUL48)
|
||||||
} else if (y <= 48) {
|
if (yy >= 40 && y <= 48) {
|
||||||
fp_mul_comba48(A,B,C);
|
fp_mul_comba48(A,B,C);
|
||||||
#endif
|
return;
|
||||||
#if defined(TFM_MUL64)
|
|
||||||
} else if (y <= 64) {
|
|
||||||
fp_mul_comba64(A,B,C);
|
|
||||||
#endif
|
|
||||||
#if !defined(TFM_HUGE) && !defined(TFM_SMALL_SET)
|
|
||||||
{
|
|
||||||
#else
|
|
||||||
} else {
|
|
||||||
#endif
|
|
||||||
fp_mul_comba(A,B,C);
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(TFM_MUL64)
|
||||||
|
if (yy >= 56 && y <= 64) {
|
||||||
|
fp_mul_comba64(A,B,C);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
fp_mul_comba(A,B,C);
|
||||||
} else {
|
} else {
|
||||||
/* do the karatsuba action
|
/* do the karatsuba action
|
||||||
|
|
||||||
@ -2596,12 +2596,12 @@ asm( \
|
|||||||
/* untested: will mulhwu change the flags? Docs say no */
|
/* untested: will mulhwu change the flags? Docs say no */
|
||||||
#define MULADD(i, j) \
|
#define MULADD(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%6,%7 \n\t" \
|
" mullw 16,%6,%7 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" mulhwu r16,%6,%7 \n\t" \
|
" mulhwu 16,%6,%7 \n\t" \
|
||||||
" adde %1,%1,r16 \n\t" \
|
" adde %1,%1,16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/* ISO C code */
|
/* ISO C code */
|
||||||
@ -5740,29 +5740,28 @@ void fp_sqr(fp_int *A, fp_int *B)
|
|||||||
#if defined(TFM_SMALL_SET)
|
#if defined(TFM_SMALL_SET)
|
||||||
if (y <= 16) {
|
if (y <= 16) {
|
||||||
fp_sqr_comba_small(A,B);
|
fp_sqr_comba_small(A,B);
|
||||||
#elif defined(TFM_HUGE)
|
return;
|
||||||
if (0) { 1;
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_SQR32)
|
#if defined(TFM_SQR32)
|
||||||
} else if (y <= 32) {
|
if (y <= 32) {
|
||||||
fp_sqr_comba32(A,B);
|
fp_sqr_comba32(A,B);
|
||||||
|
return;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_SQR48)
|
#if defined(TFM_SQR48)
|
||||||
} else if (y <= 48) {
|
if (y <= 48) {
|
||||||
fp_sqr_comba48(A,B);
|
fp_sqr_comba48(A,B);
|
||||||
|
return;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(TFM_SQR64)
|
#if defined(TFM_SQR64)
|
||||||
} else if (y <= 64) {
|
if (y <= 64) {
|
||||||
fp_sqr_comba64(A,B);
|
fp_sqr_comba64(A,B);
|
||||||
#endif
|
return;
|
||||||
#if !defined(TFM_SMALL_SET) && !defined(TFM_HUGE)
|
|
||||||
{
|
|
||||||
#else
|
|
||||||
} else {
|
|
||||||
#endif
|
|
||||||
fp_sqr_comba(A, B);
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
fp_sqr_comba(A, B);
|
||||||
} else {
|
} else {
|
||||||
/* do the karatsuba action
|
/* do the karatsuba action
|
||||||
|
|
||||||
@ -6150,7 +6149,7 @@ asm( \
|
|||||||
|
|
||||||
/* PPC32 */
|
/* PPC32 */
|
||||||
|
|
||||||
#define COMBA_START \
|
#define COMBA_START
|
||||||
|
|
||||||
#define CLEAR_CARRY \
|
#define CLEAR_CARRY \
|
||||||
c0 = c1 = c2 = 0;
|
c0 = c1 = c2 = 0;
|
||||||
@ -6164,30 +6163,30 @@ asm( \
|
|||||||
#define CARRY_FORWARD \
|
#define CARRY_FORWARD \
|
||||||
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
|
||||||
|
|
||||||
#define COMBA_FINI \
|
#define COMBA_FINI
|
||||||
|
|
||||||
/* multiplies point i and j, updates carry "c1" and digit c2 */
|
/* multiplies point i and j, updates carry "c1" and digit c2 */
|
||||||
#define SQRADD(i, j) \
|
#define SQRADD(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%6,%6 \n\t" \
|
" mullw 16,%6,%6 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" mulhwu r16,%6,%6 \n\t" \
|
" mulhwu 16,%6,%6 \n\t" \
|
||||||
" adde %1,%1,r16 \n\t" \
|
" adde %1,%1,16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
|
||||||
|
|
||||||
/* for squaring some of the terms are doubled... */
|
/* for squaring some of the terms are doubled... */
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%6,%7 \n\t" \
|
" mullw 16,%6,%7 \n\t" \
|
||||||
" mulhwu r17,%6,%7 \n\t" \
|
" mulhwu 17,%6,%7 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" adde %1,%1,r17 \n\t" \
|
" adde %1,%1,17 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" adde %1,%1,r17 \n\t" \
|
" adde %1,%1,17 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
@ -6196,14 +6195,14 @@ asm( \
|
|||||||
" xor %2,%2,%2 \n\t" \
|
" xor %2,%2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm( \
|
asm( \
|
||||||
" mullw r16,%6,%7 \n\t" \
|
" mullw 16,%6,%7 \n\t" \
|
||||||
" addc %0,%0,r16 \n\t" \
|
" addc %0,%0,16 \n\t" \
|
||||||
" mulhwu r16,%6,%7 \n\t" \
|
" mulhwu 16,%6,%7 \n\t" \
|
||||||
" adde %1,%1,r16 \n\t" \
|
" adde %1,%1,16 \n\t" \
|
||||||
" addze %2,%2 \n\t" \
|
" addze %2,%2 \n\t" \
|
||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm( \
|
asm( \
|
||||||
|
12
tfm.h
12
tfm.h
@ -372,9 +372,13 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
|
|||||||
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
|
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef TFM_HUGE
|
#ifdef TFM_MUL32
|
||||||
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
|
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
|
||||||
|
#endif
|
||||||
|
#ifdef TFM_MUL48
|
||||||
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
|
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
|
||||||
|
#endif
|
||||||
|
#ifdef TFM_MUL64
|
||||||
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
|
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -384,9 +388,13 @@ void fp_sqr_comba(fp_int *A, fp_int *B);
|
|||||||
void fp_sqr_comba_small(fp_int *A, fp_int *B);
|
void fp_sqr_comba_small(fp_int *A, fp_int *B);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef TFM_HUGE
|
#ifdef TFM_SQR32
|
||||||
void fp_sqr_comba32(fp_int *A, fp_int *B);
|
void fp_sqr_comba32(fp_int *A, fp_int *B);
|
||||||
|
#endif
|
||||||
|
#ifdef TFM_SQR48
|
||||||
void fp_sqr_comba48(fp_int *A, fp_int *B);
|
void fp_sqr_comba48(fp_int *A, fp_int *B);
|
||||||
|
#endif
|
||||||
|
#ifdef TFM_SQR64
|
||||||
void fp_sqr_comba64(fp_int *A, fp_int *B);
|
void fp_sqr_comba64(fp_int *A, fp_int *B);
|
||||||
#endif
|
#endif
|
||||||
extern const char *fp_s_rmap;
|
extern const char *fp_s_rmap;
|
||||||
|
4
tfm.log
4
tfm.log
@ -1,4 +1,4 @@
|
|||||||
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.9.20) 31 OCT 2005 11:30
|
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.11.6) 18 NOV 2005 06:14
|
||||||
entering extended mode
|
entering extended mode
|
||||||
**tfm
|
**tfm
|
||||||
(./tfm.tex
|
(./tfm.tex
|
||||||
@ -329,4 +329,4 @@ Here is how much of TeX's memory you used:
|
|||||||
580 hyphenation exceptions out of 1000
|
580 hyphenation exceptions out of 1000
|
||||||
25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s
|
25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s
|
||||||
|
|
||||||
Output written on tfm.dvi (25 pages, 51616 bytes).
|
Output written on tfm.dvi (25 pages, 51624 bytes).
|
||||||
|
2
tfm.tex
2
tfm.tex
@ -49,7 +49,7 @@
|
|||||||
\begin{document}
|
\begin{document}
|
||||||
\frontmatter
|
\frontmatter
|
||||||
\pagestyle{empty}
|
\pagestyle{empty}
|
||||||
\title{TomsFastMath User Manual \\ v0.06}
|
\title{TomsFastMath User Manual \\ v0.07}
|
||||||
\author{Tom St Denis \\ tomstdenis@gmail.com}
|
\author{Tom St Denis \\ tomstdenis@gmail.com}
|
||||||
\maketitle
|
\maketitle
|
||||||
This text and library are all hereby placed in the public domain. This book has been formatted for B5
|
This text and library are all hereby placed in the public domain. This book has been formatted for B5
|
||||||
|
Loading…
Reference in New Issue
Block a user