added tomsfastmath-0.07

This commit is contained in:
Tom St Denis 2005-11-18 05:16:25 +00:00 committed by Steffen Jaeckel
parent 091b337fe8
commit 9ce1fe4656
15 changed files with 190 additions and 174 deletions

View File

@ -1,3 +1,8 @@
November 18th, 2005
0.07 -- Fixes to fp_mul and fp_sqr to clean up the handling of the defines, fix to tfm.h to also clear up the prototypes.
-- Updates to build and run on a IBM PPC 405 [using GCC 3.4.4]
-- Made the "make" command renamable in the build system
October 31st, 2005
0.06 -- fixed fp_mul() and fp_sqr() to trim digits when overflows would occur. Produces numerically inprecise results
(e.g. the lower FP_SIZE digits) but shouldn't segfault at least ;-)

View File

@ -23,7 +23,7 @@ static ulong64 TIMFUNC (void)
{
#if defined __GNUC__
#if defined(INTEL_CC)
ulong64 a;
ulong64 a;
asm ("rdtsc":"=A"(a));
return a;
#elif defined(__i386__) || defined(__x86_64__)
@ -31,9 +31,9 @@ static ulong64 TIMFUNC (void)
__asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
return a;
#elif defined(TFM_PPC32)
unsigned long a;
__asm__ __volatile__ ("mftb %0":"=r"(a));
return a;
unsigned long a, b;
__asm__ __volatile__ ("mftbu %1 \nmftb %0\n":"=r"(a), "=r"(b));
return (((ulong64)b) << 32ULL) | ((ulong64)a);
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");

Binary file not shown.

View File

@ -259,24 +259,24 @@ asm( \
#define INNERMUL \
asm( \
" mullw r16,%3,%4 \n\t" \
" mulhwu r17,%3,%4 \n\t" \
" addc r16,r16,%0 \n\t" \
" addze r17,r17 \n\t" \
" lwz r18,%1 \n\t" \
" addc r16,r16,r18 \n\t" \
" addze %0,r17 \n\t" \
" stw r16,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r16", "r17", "r18","%cc");
" mullw 16,%3,%4 \n\t" \
" mulhwu 17,%3,%4 \n\t" \
" addc 16,16,%0 \n\t" \
" addze 17,17 \n\t" \
" lwz 18,%1 \n\t" \
" addc 16,16,18 \n\t" \
" addze %0,17 \n\t" \
" stw 16,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
#define PROPCARRY \
asm( \
" lwz r16,%1 \n\t" \
" addc r16,r16,%0 \n\t" \
" stw r16,%1 \n\t" \
" xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
" lwz 16,%1 \n\t" \
" addc 16,16,%0 \n\t" \
" stw 16,%1 \n\t" \
" xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
/******************************************************************/
#else

View File

@ -34,28 +34,28 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
#ifdef TFM_SMALL_SET
if (y <= 16) {
fp_mul_comba_small(A,B,C);
#elif defined(TFM_HUGE)
if (0) { 1;
#endif
return;
}
#endif
#if defined(TFM_MUL32)
} else if (y <= 32) {
if (yy >= 24 && y <= 32) {
fp_mul_comba32(A,B,C);
return;
}
#endif
#if defined(TFM_MUL48)
} else if (y <= 48) {
if (yy >= 40 && y <= 48) {
fp_mul_comba48(A,B,C);
#endif
#if defined(TFM_MUL64)
} else if (y <= 64) {
fp_mul_comba64(A,B,C);
#endif
#if !defined(TFM_HUGE) && !defined(TFM_SMALL_SET)
{
#else
} else {
#endif
fp_mul_comba(A,B,C);
return;
}
#endif
#if defined(TFM_MUL64)
if (yy >= 56 && y <= 64) {
fp_mul_comba64(A,B,C);
return;
}
#endif
fp_mul_comba(A,B,C);
} else {
/* do the karatsuba action

View File

@ -179,12 +179,12 @@ asm( \
/* untested: will mulhwu change the flags? Docs say no */
#define MULADD(i, j) \
asm( \
" mullw r16,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \
" mulhwu r16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \
" mullw 16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
#else
/* ISO C code */

View File

@ -27,29 +27,28 @@ void fp_sqr(fp_int *A, fp_int *B)
#if defined(TFM_SMALL_SET)
if (y <= 16) {
fp_sqr_comba_small(A,B);
#elif defined(TFM_HUGE)
if (0) { 1;
return;
}
#endif
#if defined(TFM_SQR32)
} else if (y <= 32) {
if (y <= 32) {
fp_sqr_comba32(A,B);
return;
}
#endif
#if defined(TFM_SQR48)
} else if (y <= 48) {
if (y <= 48) {
fp_sqr_comba48(A,B);
return;
}
#endif
#if defined(TFM_SQR64)
} else if (y <= 64) {
if (y <= 64) {
fp_sqr_comba64(A,B);
#endif
#if !defined(TFM_SMALL_SET) && !defined(TFM_HUGE)
{
#else
} else {
#endif
fp_sqr_comba(A, B);
return;
}
#endif
fp_sqr_comba(A, B);
} else {
/* do the karatsuba action

View File

@ -303,7 +303,7 @@ asm( \
/* PPC32 */
#define COMBA_START \
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
@ -317,30 +317,30 @@ asm( \
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI \
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
asm( \
" mullw r16,%6,%6 \n\t" \
" addc %0,%0,r16 \n\t" \
" mulhwu r16,%6,%6 \n\t" \
" adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
#define SQRADD(i, j) \
asm( \
" mullw 16,%6,%6 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%6 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
asm( \
" mullw r16,%6,%7 \n\t" \
" mulhwu r17,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \
" adde %1,%1,r17 \n\t" \
" addze %2,%2 \n\t" \
" addc %0,%0,r16 \n\t" \
" adde %1,%1,r17 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
#define SQRADD2(i, j) \
asm( \
" mullw 16,%6,%7 \n\t" \
" mulhwu 17,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \
" addc %0,%0,16 \n\t" \
" adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
#define SQRADDSC(i, j) \
asm( \
@ -349,14 +349,14 @@ asm( \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
#define SQRADDAC(i, j) \
asm( \
" mullw r16,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \
" mulhwu r16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
#define SQRADDAC(i, j) \
asm( \
" mullw 16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
#define SQRADDDB \
asm( \

View File

@ -1,10 +1,14 @@
#makefile for TomsFastMath
#
#
VERSION=0.06
VERSION=0.07
CFLAGS += -Wall -W -Wshadow -I./
ifndef MAKE
MAKE=make
endif
ifndef IGNORE_SPEED
CFLAGS += -O3 -funroll-all-loops
@ -85,7 +89,7 @@ install: $(LIBNAME)
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
mtest/mtest: mtest/mtest.c
cd mtest ; CFLAGS="$(CFLAGS) -I../" make mtest
cd mtest ; CFLAGS="$(CFLAGS) -I../" MAKE=${MAKE} ${MAKE} mtest
test: $(LIBNAME) demo/test.o mtest/mtest
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
@ -128,7 +132,7 @@ clean:
rm -f `find . -type f | grep "[.]dpi" | xargs`
rm -rf `find . -type d | grep "[.]libs" | xargs`
rm -f tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc
cd mtest ; make clean
cd mtest ; MAKE=${MAKE} ${MAKE} clean
no_oops: clean
cd .. ; cvs commit
@ -140,8 +144,9 @@ zipup: no_oops docs clean
cd .. ; rm -rf tfm* tomsfastmath-$(VERSION) ; mkdir tomsfastmath-$(VERSION) ; \
cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \
tar -c tomsfastmath-$(VERSION)/* | bzip2 -9vvc > tfm-$(VERSION).tar.bz2 ; \
zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/*
zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/* ; \
mv -f tfm* ~ ; rm -rf tomsfastmath-$(VERSION)
# $Source: /cvs/libtom/tomsfastmath/makefile,v $
# $Revision: 1.19 $
# $Date: 2005/08/25 23:53:40 $
# $Revision: 1.23 $
# $Date: 2005/11/18 06:13:57 $

View File

@ -1,7 +1,7 @@
#makefile for TomsFastMath
#
#
VERSION=0:6
VERSION=0:7
CC=libtool --mode=compile gcc
@ -104,6 +104,6 @@ stest: $(LIBNAME) demo/stest.o
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest
# $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $
# $Revision: 1.7 $
# $Date: 2005/10/06 23:31:17 $
# $Revision: 1.8 $
# $Date: 2005/10/31 17:26:00 $

View File

@ -1951,24 +1951,24 @@ asm( \
#define INNERMUL \
asm( \
" mullw r16,%3,%4 \n\t" \
" mulhwu r17,%3,%4 \n\t" \
" addc r16,r16,%0 \n\t" \
" addze r17,r17 \n\t" \
" lwz r18,%1 \n\t" \
" addc r16,r16,r18 \n\t" \
" addze %0,r17 \n\t" \
" stw r16,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r16", "r17", "r18","%cc");
" mullw 16,%3,%4 \n\t" \
" mulhwu 17,%3,%4 \n\t" \
" addc 16,16,%0 \n\t" \
" addze 17,17 \n\t" \
" lwz 18,%1 \n\t" \
" addc 16,16,18 \n\t" \
" addze %0,17 \n\t" \
" stw 16,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
#define PROPCARRY \
asm( \
" lwz r16,%1 \n\t" \
" addc r16,r16,%0 \n\t" \
" stw r16,%1 \n\t" \
" xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
" lwz 16,%1 \n\t" \
" addc 16,16,%0 \n\t" \
" stw 16,%1 \n\t" \
" xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
/******************************************************************/
#else
@ -2169,28 +2169,28 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
#ifdef TFM_SMALL_SET
if (y <= 16) {
fp_mul_comba_small(A,B,C);
#elif defined(TFM_HUGE)
if (0) { 1;
#endif
return;
}
#endif
#if defined(TFM_MUL32)
} else if (y <= 32) {
if (yy >= 24 && y <= 32) {
fp_mul_comba32(A,B,C);
return;
}
#endif
#if defined(TFM_MUL48)
} else if (y <= 48) {
if (yy >= 40 && y <= 48) {
fp_mul_comba48(A,B,C);
#endif
#if defined(TFM_MUL64)
} else if (y <= 64) {
fp_mul_comba64(A,B,C);
#endif
#if !defined(TFM_HUGE) && !defined(TFM_SMALL_SET)
{
#else
} else {
#endif
fp_mul_comba(A,B,C);
return;
}
#endif
#if defined(TFM_MUL64)
if (yy >= 56 && y <= 64) {
fp_mul_comba64(A,B,C);
return;
}
#endif
fp_mul_comba(A,B,C);
} else {
/* do the karatsuba action
@ -2596,12 +2596,12 @@ asm( \
/* untested: will mulhwu change the flags? Docs say no */
#define MULADD(i, j) \
asm( \
" mullw r16,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \
" mulhwu r16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \
" mullw 16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
#else
/* ISO C code */
@ -5740,29 +5740,28 @@ void fp_sqr(fp_int *A, fp_int *B)
#if defined(TFM_SMALL_SET)
if (y <= 16) {
fp_sqr_comba_small(A,B);
#elif defined(TFM_HUGE)
if (0) { 1;
return;
}
#endif
#if defined(TFM_SQR32)
} else if (y <= 32) {
if (y <= 32) {
fp_sqr_comba32(A,B);
return;
}
#endif
#if defined(TFM_SQR48)
} else if (y <= 48) {
if (y <= 48) {
fp_sqr_comba48(A,B);
return;
}
#endif
#if defined(TFM_SQR64)
} else if (y <= 64) {
if (y <= 64) {
fp_sqr_comba64(A,B);
#endif
#if !defined(TFM_SMALL_SET) && !defined(TFM_HUGE)
{
#else
} else {
#endif
fp_sqr_comba(A, B);
return;
}
#endif
fp_sqr_comba(A, B);
} else {
/* do the karatsuba action
@ -6150,7 +6149,7 @@ asm( \
/* PPC32 */
#define COMBA_START \
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
@ -6164,30 +6163,30 @@ asm( \
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI \
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
asm( \
" mullw r16,%6,%6 \n\t" \
" addc %0,%0,r16 \n\t" \
" mulhwu r16,%6,%6 \n\t" \
" adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
#define SQRADD(i, j) \
asm( \
" mullw 16,%6,%6 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%6 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
asm( \
" mullw r16,%6,%7 \n\t" \
" mulhwu r17,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \
" adde %1,%1,r17 \n\t" \
" addze %2,%2 \n\t" \
" addc %0,%0,r16 \n\t" \
" adde %1,%1,r17 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
#define SQRADD2(i, j) \
asm( \
" mullw 16,%6,%7 \n\t" \
" mulhwu 17,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \
" addc %0,%0,16 \n\t" \
" adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
#define SQRADDSC(i, j) \
asm( \
@ -6196,14 +6195,14 @@ asm( \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
#define SQRADDAC(i, j) \
asm( \
" mullw r16,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \
" mulhwu r16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
#define SQRADDAC(i, j) \
asm( \
" mullw 16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
#define SQRADDDB \
asm( \

BIN
tfm.dvi

Binary file not shown.

12
tfm.h
View File

@ -372,9 +372,13 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_HUGE
#ifdef TFM_MUL32
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL48
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL64
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
#endif
@ -384,9 +388,13 @@ void fp_sqr_comba(fp_int *A, fp_int *B);
void fp_sqr_comba_small(fp_int *A, fp_int *B);
#endif
#ifdef TFM_HUGE
#ifdef TFM_SQR32
void fp_sqr_comba32(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR48
void fp_sqr_comba48(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR64
void fp_sqr_comba64(fp_int *A, fp_int *B);
#endif
extern const char *fp_s_rmap;

View File

@ -1,4 +1,4 @@
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.9.20) 31 OCT 2005 11:30
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.11.6) 18 NOV 2005 06:14
entering extended mode
**tfm
(./tfm.tex
@ -329,4 +329,4 @@ Here is how much of TeX's memory you used:
580 hyphenation exceptions out of 1000
25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s
Output written on tfm.dvi (25 pages, 51616 bytes).
Output written on tfm.dvi (25 pages, 51624 bytes).

View File

@ -49,7 +49,7 @@
\begin{document}
\frontmatter
\pagestyle{empty}
\title{TomsFastMath User Manual \\ v0.06}
\title{TomsFastMath User Manual \\ v0.07}
\author{Tom St Denis \\ tomstdenis@gmail.com}
\maketitle
This text and library are all hereby placed in the public domain. This book has been formatted for B5