added tomsfastmath-0.06

This commit is contained in:
Tom St Denis 2005-10-31 15:32:05 +00:00 committed by Steffen Jaeckel
parent a6c4c5a261
commit 091b337fe8
20 changed files with 161 additions and 73 deletions

View File

@ -1,3 +1,9 @@
October 31st, 2005
0.06 -- fixed fp_mul() and fp_sqr() to trim digits when overflows would occur. Produces numerically inprecise results
(e.g. the lower FP_SIZE digits) but shouldn't segfault at least ;-)
-- Updated the combas so you can turn on and off specific unrolled loops at build time
-- Michael Heyman reported a bug in s_fp_sub() that was pretty substantial and a bug in fp_montgomery_calc_normalization(). Fixed.
August 1st, 2005 August 1st, 2005
0.05 -- Quick fix to the fp_invmod.c code to let it handle even moduli [required for LTC] 0.05 -- Quick fix to the fp_invmod.c code to let it handle even moduli [required for LTC]
-- Added makefile.shared to make shared objects [required for LTC] -- Added makefile.shared to make shared objects [required for LTC]

View File

@ -283,7 +283,8 @@ sqrtime:
//#else //#else
monttime: monttime:
printf("Montgomery:\n"); printf("Montgomery:\n");
for (t = 2; t <= (FP_SIZE/2)-2; t += 2) { for (t = 2; t <= (FP_SIZE/2)-4; t += 2) {
// printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
fp_zero(&a); fp_zero(&a);
for (ix = 0; ix < t; ix++) { for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix | 1; a.dp[ix] = ix | 1;
@ -343,6 +344,9 @@ expttime:
return; return;
testing: testing:
fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f); fp_zero(&a);
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n = div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= mul_d_n = 0; sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= mul_d_n = 0;

Binary file not shown.

View File

@ -18,6 +18,7 @@ void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
/* how many bits of last digit does b use */ /* how many bits of last digit does b use */
bits = fp_count_bits (b) % DIGIT_BIT; bits = fp_count_bits (b) % DIGIT_BIT;
if (!bits) bits = DIGIT_BIT;
/* compute A = B^(n-1) * 2^(bits-1) */ /* compute A = B^(n-1) * 2^(bits-1) */
if (b->used > 1) { if (b->used > 1) {

View File

@ -64,6 +64,8 @@ asm( \
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
: "%rax", "%rdx", "%cc") : "%rax", "%rdx", "%cc")
#ifdef TFM_HUGE
#define INNERMUL8 \ #define INNERMUL8 \
asm( \ asm( \
"movq 0(%5),%%rax \n\t" \ "movq 0(%5),%%rax \n\t" \
@ -157,6 +159,8 @@ asm( \
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
: "%rax", "%rdx", "%r10", "%r11", "%cc") : "%rax", "%rdx", "%r10", "%r11", "%cc")
#endif
#define PROPCARRY \ #define PROPCARRY \
asm( \ asm( \
@ -306,6 +310,11 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
fp_digit c[FP_SIZE], *_c, *tmpm, mu; fp_digit c[FP_SIZE], *_c, *tmpm, mu;
int oldused, x, y, pa; int oldused, x, y, pa;
/* bail if too large */
if (m->used > (FP_SIZE/2)) {
return;
}
#if defined(USE_MEMSET) #if defined(USE_MEMSET)
/* now zero the buff */ /* now zero the buff */
memset(c, 0, sizeof c); memset(c, 0, sizeof c);
@ -331,7 +340,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
_c = c + x; _c = c + x;
tmpm = m->dp; tmpm = m->dp;
y = 0; y = 0;
#if defined(TFM_X86_64) #if defined(TFM_X86_64) && defined(TFM_HUGE)
for (; y < (pa & ~7); y += 8) { for (; y < (pa & ~7); y += 8) {
INNERMUL8; INNERMUL8;
_c += 8; _c += 8;

View File

@ -15,6 +15,12 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
int r, y, yy, s; int r, y, yy, s;
fp_int ac, bd, comp, amb, cmd, t1, t2; fp_int ac, bd, comp, amb, cmd, t1, t2;
/* call generic if we're out of range */
if (A->used + B->used > FP_SIZE) {
fp_mul_comba(A, B, C);
return ;
}
y = MAX(A->used, B->used); y = MAX(A->used, B->used);
yy = MIN(A->used, B->used); yy = MIN(A->used, B->used);
if (yy <= 8 || y <= 64) { if (yy <= 8 || y <= 64) {
@ -31,11 +37,15 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
#elif defined(TFM_HUGE) #elif defined(TFM_HUGE)
if (0) { 1; if (0) { 1;
#endif #endif
#if defined(TFM_HUGE) #if defined(TFM_MUL32)
} else if (y <= 32) { } else if (y <= 32) {
fp_mul_comba32(A,B,C); fp_mul_comba32(A,B,C);
#endif
#if defined(TFM_MUL48)
} else if (y <= 48) { } else if (y <= 48) {
fp_mul_comba48(A,B,C); fp_mul_comba48(A,B,C);
#endif
#if defined(TFM_MUL64)
} else if (y <= 64) { } else if (y <= 64) {
fp_mul_comba64(A,B,C); fp_mul_comba64(A,B,C);
#endif #endif

View File

@ -266,8 +266,8 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI; COMBA_FINI;
dst->used = pa; dst->used = pa;
dst->sign = A->sign ^ B->sign;
fp_clamp(dst); fp_clamp(dst);
dst->sign = dst->used ? A->sign ^ B->sign : FP_ZPOS;
fp_copy(dst, C); fp_copy(dst, C);
} }
@ -1497,8 +1497,7 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
#endif #endif
#ifdef TFM_HUGE #ifdef TFM_MUL32
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
{ {
fp_digit c0, c1, c2, at[64]; fp_digit c0, c1, c2, at[64];
@ -1765,7 +1764,9 @@ void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
fp_clamp(C); fp_clamp(C);
COMBA_FINI; COMBA_FINI;
} }
#endif
#ifdef TFM_MUL64
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
{ {
fp_digit c0, c1, c2, at[128]; fp_digit c0, c1, c2, at[128];
@ -2288,7 +2289,9 @@ void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
fp_clamp(C); fp_clamp(C);
COMBA_FINI; COMBA_FINI;
} }
#endif
#ifdef TFM_MUL48
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
{ {
fp_digit c0, c1, c2, at[96]; fp_digit c0, c1, c2, at[96];
@ -2683,8 +2686,6 @@ void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
fp_clamp(C); fp_clamp(C);
COMBA_FINI; COMBA_FINI;
} }
#endif #endif

View File

@ -13,7 +13,7 @@ void fp_set(fp_int *a, fp_digit b)
{ {
fp_zero(a); fp_zero(a);
a->dp[0] = b; a->dp[0] = b;
a->used = b ? 1 : 0; a->used = a->dp[0] ? 1 : 0;
} }
/* $Source$ */ /* $Source$ */

View File

@ -15,6 +15,12 @@ void fp_sqr(fp_int *A, fp_int *B)
int r, y, s; int r, y, s;
fp_int aa, bb, comp, amb, t1; fp_int aa, bb, comp, amb, t1;
/* call generic if we're out of range */
if (A->used + A->used > FP_SIZE) {
fp_sqr_comba(A, B);
return ;
}
y = A->used; y = A->used;
if (y <= 64) { if (y <= 64) {
@ -24,11 +30,15 @@ void fp_sqr(fp_int *A, fp_int *B)
#elif defined(TFM_HUGE) #elif defined(TFM_HUGE)
if (0) { 1; if (0) { 1;
#endif #endif
#if defined(TFM_HUGE) #if defined(TFM_SQR32)
} else if (y <= 32) { } else if (y <= 32) {
fp_sqr_comba32(A,B); fp_sqr_comba32(A,B);
#endif
#if defined(TFM_SQR48)
} else if (y <= 48) { } else if (y <= 48) {
fp_sqr_comba48(A,B); fp_sqr_comba48(A,B);
#endif
#if defined(TFM_SQR64)
} else if (y <= 64) { } else if (y <= 64) {
fp_sqr_comba64(A,B); fp_sqr_comba64(A,B);
#endif #endif

View File

@ -1945,7 +1945,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
#endif /* TFM_SMALL_SET */ #endif /* TFM_SMALL_SET */
#ifdef TFM_HUGE #ifdef TFM_SQR32
void fp_sqr_comba32(fp_int *A, fp_int *B) void fp_sqr_comba32(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
@ -2272,16 +2272,14 @@ void fp_sqr_comba32(fp_int *A, fp_int *B)
COMBA_STORE2(b[63]); COMBA_STORE2(b[63]);
COMBA_FINI; COMBA_FINI;
memcpy(B->dp, b, 64 * sizeof(fp_digit));
B->used = 64; B->used = 64;
B->sign = FP_ZPOS; B->sign = FP_ZPOS;
memcpy(B->dp, b, 64 * sizeof(fp_digit));
fp_clamp(B); fp_clamp(B);
} }
#endif #endif
#ifdef TFM_HUGE #ifdef TFM_SQR64
void fp_sqr_comba64(fp_int *A, fp_int *B) void fp_sqr_comba64(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2;
@ -2933,7 +2931,9 @@ void fp_sqr_comba64(fp_int *A, fp_int *B)
memcpy(B->dp, b, 128 * sizeof(fp_digit)); memcpy(B->dp, b, 128 * sizeof(fp_digit));
fp_clamp(B); fp_clamp(B);
} }
#endif
#ifdef TFM_SQR48
void fp_sqr_comba48(fp_int *A, fp_int *B) void fp_sqr_comba48(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2;
@ -3420,9 +3420,9 @@ void fp_sqr_comba48(fp_int *A, fp_int *B)
COMBA_STORE2(b[95]); COMBA_STORE2(b[95]);
COMBA_FINI; COMBA_FINI;
memcpy(B->dp, b, 96 * sizeof(fp_digit));
B->used = 96; B->used = 96;
B->sign = FP_ZPOS; B->sign = FP_ZPOS;
memcpy(B->dp, b, 96 * sizeof(fp_digit));
fp_clamp(B); fp_clamp(B);
} }

View File

@ -1,7 +1,7 @@
#makefile for TomsFastMath #makefile for TomsFastMath
# #
# #
VERSION=0.05 VERSION=0.06
CFLAGS += -Wall -W -Wshadow -I./ CFLAGS += -Wall -W -Wshadow -I./
@ -85,7 +85,7 @@ install: $(LIBNAME)
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH) install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
mtest/mtest: mtest/mtest.c mtest/mtest: mtest/mtest.c
cd mtest ; make mtest cd mtest ; CFLAGS="$(CFLAGS) -I../" make mtest
test: $(LIBNAME) demo/test.o mtest/mtest test: $(LIBNAME) demo/test.o mtest/mtest
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test $(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
@ -143,5 +143,5 @@ zipup: no_oops docs clean
zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/* zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/*
# $Source: /cvs/libtom/tomsfastmath/makefile,v $ # $Source: /cvs/libtom/tomsfastmath/makefile,v $
# $Revision: 1.17 $ # $Revision: 1.19 $
# $Date: 2005/07/30 04:23:55 $ # $Date: 2005/08/25 23:53:40 $

View File

@ -1,6 +1,7 @@
#makefile for TomsFastMath #makefile for TomsFastMath
# #
# #
VERSION=0:6
CC=libtool --mode=compile gcc CC=libtool --mode=compile gcc
@ -19,7 +20,6 @@ CFLAGS += -fomit-frame-pointer
endif endif
VERSION=0:5
OBJECTS = \ OBJECTS = \
fp_set.o \ fp_set.o \
@ -81,12 +81,12 @@ endif
default: $(LIBNAME) default: $(LIBNAME)
objs: $(OBJECTS)
$(LIBNAME): $(OBJECTS) $(LIBNAME): $(OBJECTS)
libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]lo" | xargs` -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION)
install: $(LIBNAME) install: $(LIBNAME)
libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]lo" | xargs` -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION)
libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]o" | xargs` -o $(LIBNAME_S)
ranlib $(LIBNAME_S)
libtool --silent --mode=install install -c $(LIBNAME) $(LIBPATH)/$(LIBNAME) libtool --silent --mode=install install -c $(LIBNAME) $(LIBPATH)/$(LIBNAME)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH) install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH) install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
@ -104,6 +104,6 @@ stest: $(LIBNAME) demo/stest.o
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest $(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest
# $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $ # $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $
# $Revision: 1.4 $ # $Revision: 1.7 $
# $Date: 2005/07/28 03:08:35 $ # $Date: 2005/10/06 23:31:17 $

View File

@ -38,6 +38,8 @@ mulmod
#include <stdlib.h> #include <stdlib.h>
#include <time.h> #include <time.h>
#include <tommath.h> #include <tommath.h>
#define CRYPT
#include "../tfm.h"
FILE *rng; FILE *rng;
@ -47,7 +49,7 @@ void rand_num(mp_int *a)
int n, size; int n, size;
unsigned char buf[2048]; unsigned char buf[2048];
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 256; size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % (FP_MAX_SIZE/16 - DIGIT_BIT/2);
buf[0] = (fgetc(rng)&1)?1:0; buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng); fread(buf+1, 1, size, rng);
while (buf[1] == 0) buf[1] = fgetc(rng); while (buf[1] == 0) buf[1] = fgetc(rng);
@ -60,7 +62,7 @@ void rand_num2(mp_int *a)
int n, size; int n, size;
unsigned char buf[2048]; unsigned char buf[2048];
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 256; size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % (FP_MAX_SIZE/16 - DIGIT_BIT/2);
buf[0] = (fgetc(rng)&1)?1:0; buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng); fread(buf+1, 1, size, rng);
while (buf[1] == 0) buf[1] = fgetc(rng); while (buf[1] == 0) buf[1] = fgetc(rng);
@ -118,7 +120,6 @@ int main(void)
} }
#endif #endif
n = fgetc(rng) % 16; n = fgetc(rng) % 16;
if (n == 0) { if (n == 0) {
/* add tests */ /* add tests */
rand_num(&a); rand_num(&a);

View File

@ -1663,6 +1663,7 @@ void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
/* how many bits of last digit does b use */ /* how many bits of last digit does b use */
bits = fp_count_bits (b) % DIGIT_BIT; bits = fp_count_bits (b) % DIGIT_BIT;
if (!bits) bits = DIGIT_BIT;
/* compute A = B^(n-1) * 2^(bits-1) */ /* compute A = B^(n-1) * 2^(bits-1) */
if (b->used > 1) { if (b->used > 1) {
@ -1755,6 +1756,8 @@ asm( \
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
: "%rax", "%rdx", "%cc") : "%rax", "%rdx", "%cc")
#ifdef TFM_HUGE
#define INNERMUL8 \ #define INNERMUL8 \
asm( \ asm( \
"movq 0(%5),%%rax \n\t" \ "movq 0(%5),%%rax \n\t" \
@ -1848,6 +1851,8 @@ asm( \
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
: "%rax", "%rdx", "%r10", "%r11", "%cc") : "%rax", "%rdx", "%r10", "%r11", "%cc")
#endif
#define PROPCARRY \ #define PROPCARRY \
asm( \ asm( \
@ -1997,6 +2002,11 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
fp_digit c[FP_SIZE], *_c, *tmpm, mu; fp_digit c[FP_SIZE], *_c, *tmpm, mu;
int oldused, x, y, pa; int oldused, x, y, pa;
/* bail if too large */
if (m->used > (FP_SIZE/2)) {
return;
}
#if defined(USE_MEMSET) #if defined(USE_MEMSET)
/* now zero the buff */ /* now zero the buff */
memset(c, 0, sizeof c); memset(c, 0, sizeof c);
@ -2022,7 +2032,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
_c = c + x; _c = c + x;
tmpm = m->dp; tmpm = m->dp;
y = 0; y = 0;
#if defined(TFM_X86_64) #if defined(TFM_X86_64) && defined(TFM_HUGE)
for (; y < (pa & ~7); y += 8) { for (; y < (pa & ~7); y += 8) {
INNERMUL8; INNERMUL8;
_c += 8; _c += 8;
@ -2140,6 +2150,12 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
int r, y, yy, s; int r, y, yy, s;
fp_int ac, bd, comp, amb, cmd, t1, t2; fp_int ac, bd, comp, amb, cmd, t1, t2;
/* call generic if we're out of range */
if (A->used + B->used > FP_SIZE) {
fp_mul_comba(A, B, C);
return ;
}
y = MAX(A->used, B->used); y = MAX(A->used, B->used);
yy = MIN(A->used, B->used); yy = MIN(A->used, B->used);
if (yy <= 8 || y <= 64) { if (yy <= 8 || y <= 64) {
@ -2156,11 +2172,15 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
#elif defined(TFM_HUGE) #elif defined(TFM_HUGE)
if (0) { 1; if (0) { 1;
#endif #endif
#if defined(TFM_HUGE) #if defined(TFM_MUL32)
} else if (y <= 32) { } else if (y <= 32) {
fp_mul_comba32(A,B,C); fp_mul_comba32(A,B,C);
#endif
#if defined(TFM_MUL48)
} else if (y <= 48) { } else if (y <= 48) {
fp_mul_comba48(A,B,C); fp_mul_comba48(A,B,C);
#endif
#if defined(TFM_MUL64)
} else if (y <= 64) { } else if (y <= 64) {
fp_mul_comba64(A,B,C); fp_mul_comba64(A,B,C);
#endif #endif
@ -2663,8 +2683,8 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI; COMBA_FINI;
dst->used = pa; dst->used = pa;
dst->sign = A->sign ^ B->sign;
fp_clamp(dst); fp_clamp(dst);
dst->sign = dst->used ? A->sign ^ B->sign : FP_ZPOS;
fp_copy(dst, C); fp_copy(dst, C);
} }
@ -3894,8 +3914,7 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
#endif #endif
#ifdef TFM_HUGE #ifdef TFM_MUL32
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
{ {
fp_digit c0, c1, c2, at[64]; fp_digit c0, c1, c2, at[64];
@ -4162,7 +4181,9 @@ void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
fp_clamp(C); fp_clamp(C);
COMBA_FINI; COMBA_FINI;
} }
#endif
#ifdef TFM_MUL64
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
{ {
fp_digit c0, c1, c2, at[128]; fp_digit c0, c1, c2, at[128];
@ -4685,7 +4706,9 @@ void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
fp_clamp(C); fp_clamp(C);
COMBA_FINI; COMBA_FINI;
} }
#endif
#ifdef TFM_MUL48
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
{ {
fp_digit c0, c1, c2, at[96]; fp_digit c0, c1, c2, at[96];
@ -5080,8 +5103,6 @@ void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
fp_clamp(C); fp_clamp(C);
COMBA_FINI; COMBA_FINI;
} }
#endif #endif
@ -5657,7 +5678,7 @@ void fp_set(fp_int *a, fp_digit b)
{ {
fp_zero(a); fp_zero(a);
a->dp[0] = b; a->dp[0] = b;
a->used = b ? 1 : 0; a->used = a->dp[0] ? 1 : 0;
} }
/* $Source$ */ /* $Source$ */
@ -5707,6 +5728,12 @@ void fp_sqr(fp_int *A, fp_int *B)
int r, y, s; int r, y, s;
fp_int aa, bb, comp, amb, t1; fp_int aa, bb, comp, amb, t1;
/* call generic if we're out of range */
if (A->used + A->used > FP_SIZE) {
fp_sqr_comba(A, B);
return ;
}
y = A->used; y = A->used;
if (y <= 64) { if (y <= 64) {
@ -5716,11 +5743,15 @@ void fp_sqr(fp_int *A, fp_int *B)
#elif defined(TFM_HUGE) #elif defined(TFM_HUGE)
if (0) { 1; if (0) { 1;
#endif #endif
#if defined(TFM_HUGE) #if defined(TFM_SQR32)
} else if (y <= 32) { } else if (y <= 32) {
fp_sqr_comba32(A,B); fp_sqr_comba32(A,B);
#endif
#if defined(TFM_SQR48)
} else if (y <= 48) { } else if (y <= 48) {
fp_sqr_comba48(A,B); fp_sqr_comba48(A,B);
#endif
#if defined(TFM_SQR64)
} else if (y <= 64) { } else if (y <= 64) {
fp_sqr_comba64(A,B); fp_sqr_comba64(A,B);
#endif #endif
@ -7761,7 +7792,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
#endif /* TFM_SMALL_SET */ #endif /* TFM_SMALL_SET */
#ifdef TFM_HUGE #ifdef TFM_SQR32
void fp_sqr_comba32(fp_int *A, fp_int *B) void fp_sqr_comba32(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
@ -8088,16 +8119,14 @@ void fp_sqr_comba32(fp_int *A, fp_int *B)
COMBA_STORE2(b[63]); COMBA_STORE2(b[63]);
COMBA_FINI; COMBA_FINI;
memcpy(B->dp, b, 64 * sizeof(fp_digit));
B->used = 64; B->used = 64;
B->sign = FP_ZPOS; B->sign = FP_ZPOS;
memcpy(B->dp, b, 64 * sizeof(fp_digit));
fp_clamp(B); fp_clamp(B);
} }
#endif #endif
#ifdef TFM_HUGE #ifdef TFM_SQR64
void fp_sqr_comba64(fp_int *A, fp_int *B) void fp_sqr_comba64(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2;
@ -8749,7 +8778,9 @@ void fp_sqr_comba64(fp_int *A, fp_int *B)
memcpy(B->dp, b, 128 * sizeof(fp_digit)); memcpy(B->dp, b, 128 * sizeof(fp_digit));
fp_clamp(B); fp_clamp(B);
} }
#endif
#ifdef TFM_SQR48
void fp_sqr_comba48(fp_int *A, fp_int *B) void fp_sqr_comba48(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2;
@ -9236,9 +9267,9 @@ void fp_sqr_comba48(fp_int *A, fp_int *B)
COMBA_STORE2(b[95]); COMBA_STORE2(b[95]);
COMBA_FINI; COMBA_FINI;
memcpy(B->dp, b, 96 * sizeof(fp_digit));
B->used = 96; B->used = 96;
B->sign = FP_ZPOS; B->sign = FP_ZPOS;
memcpy(B->dp, b, 96 * sizeof(fp_digit));
fp_clamp(B); fp_clamp(B);
} }
@ -9652,11 +9683,11 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
c->dp[x] = (fp_digit)t; c->dp[x] = (fp_digit)t;
t >>= DIGIT_BIT; t >>= DIGIT_BIT;
} }
if (t != 0 && x != FP_SIZE) { if (t != 0 && x < FP_SIZE) {
c->dp[c->used++] = (fp_digit)t; c->dp[c->used++] = (fp_digit)t;
++x; ++x;
} }
c->used = x;
for (; x < oldused; x++) { for (; x < oldused; x++) {
c->dp[x] = 0; c->dp[x] = 0;
} }
@ -9684,18 +9715,23 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
/* unsigned subtraction ||a|| >= ||b|| ALWAYS! */ /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
{ {
int x, oldused; int x, oldbused, oldused;
fp_word t; fp_word t;
oldused = c->used; oldused = c->used;
oldbused = b->used;
c->used = a->used; c->used = a->used;
t = 0; t = 0;
for (x = 0; x < a->used; x++) { for (x = 0; x < oldbused; x++) {
t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t); t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
c->dp[x] = (fp_digit)t; c->dp[x] = (fp_digit)t;
t = (t >> DIGIT_BIT) & 1; t = (t >> DIGIT_BIT)&1;
}
for (; x < a->used; x++) {
t = ((fp_word)a->dp[x]) - t;
c->dp[x] = (fp_digit)t;
t = (t >> DIGIT_BIT);
} }
for (; x < oldused; x++) { for (; x < oldused; x++) {
c->dp[x] = 0; c->dp[x] = 0;
} }

View File

@ -25,11 +25,11 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
c->dp[x] = (fp_digit)t; c->dp[x] = (fp_digit)t;
t >>= DIGIT_BIT; t >>= DIGIT_BIT;
} }
if (t != 0 && x != FP_SIZE) { if (t != 0 && x < FP_SIZE) {
c->dp[c->used++] = (fp_digit)t; c->dp[c->used++] = (fp_digit)t;
++x; ++x;
} }
c->used = x;
for (; x < oldused; x++) { for (; x < oldused; x++) {
c->dp[x] = 0; c->dp[x] = 0;
} }

View File

@ -12,18 +12,23 @@
/* unsigned subtraction ||a|| >= ||b|| ALWAYS! */ /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
{ {
int x, oldused; int x, oldbused, oldused;
fp_word t; fp_word t;
oldused = c->used; oldused = c->used;
oldbused = b->used;
c->used = a->used; c->used = a->used;
t = 0; t = 0;
for (x = 0; x < a->used; x++) { for (x = 0; x < oldbused; x++) {
t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t); t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
c->dp[x] = (fp_digit)t; c->dp[x] = (fp_digit)t;
t = (t >> DIGIT_BIT) & 1; t = (t >> DIGIT_BIT)&1;
}
for (; x < a->used; x++) {
t = ((fp_word)a->dp[x]) - t;
c->dp[x] = (fp_digit)t;
t = (t >> DIGIT_BIT);
} }
for (; x < oldused; x++) { for (; x < oldused; x++) {
c->dp[x] = 0; c->dp[x] = 0;
} }

BIN
tfm.dvi

Binary file not shown.

9
tfm.h
View File

@ -37,7 +37,12 @@
Enable these if you are doing 32, 48 or 64 digit multiplications (useful for RSA) Enable these if you are doing 32, 48 or 64 digit multiplications (useful for RSA)
Less important on 64-bit machines as 32 digits == 2048 bits Less important on 64-bit machines as 32 digits == 2048 bits
*/ */
#define TFM_HUGE #define TFM_MUL32
#define TFM_MUL48
#define TFM_MUL64
#define TFM_SQR32
#define TFM_SQR48
#define TFM_SQR64
/* do we want some overflow checks /* do we want some overflow checks
Not required if you make sure your numbers are within range (e.g. by default a modulus for fp_exptmod() can only be upto 2048 bits long) Not required if you make sure your numbers are within range (e.g. by default a modulus for fp_exptmod() can only be upto 2048 bits long)
@ -61,7 +66,7 @@
* You can externally define this or it defaults to 4096-bits [allowing multiplications upto 2048x2048 bits ] * You can externally define this or it defaults to 4096-bits [allowing multiplications upto 2048x2048 bits ]
*/ */
#ifndef FP_MAX_SIZE #ifndef FP_MAX_SIZE
#define FP_MAX_SIZE (4096+(4*DIGIT_BIT)) #define FP_MAX_SIZE (4096+(8*DIGIT_BIT))
#endif #endif
/* will this lib work? */ /* will this lib work? */

View File

@ -1,4 +1,4 @@
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.4.10) 1 AUG 2005 13:34 This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.9.20) 31 OCT 2005 11:30
entering extended mode entering extended mode
**tfm **tfm
(./tfm.tex (./tfm.tex
@ -329,4 +329,4 @@ Here is how much of TeX's memory you used:
580 hyphenation exceptions out of 1000 580 hyphenation exceptions out of 1000
25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s 25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s
Output written on tfm.dvi (25 pages, 51612 bytes). Output written on tfm.dvi (25 pages, 51616 bytes).

View File

@ -49,7 +49,7 @@
\begin{document} \begin{document}
\frontmatter \frontmatter
\pagestyle{empty} \pagestyle{empty}
\title{TomsFastMath User Manual \\ v0.05} \title{TomsFastMath User Manual \\ v0.06}
\author{Tom St Denis \\ tomstdenis@gmail.com} \author{Tom St Denis \\ tomstdenis@gmail.com}
\maketitle \maketitle
This text and library are all hereby placed in the public domain. This book has been formatted for B5 This text and library are all hereby placed in the public domain. This book has been formatted for B5