diff --git a/changes.txt b/changes.txt index 525e2d4..4fab275 100644 --- a/changes.txt +++ b/changes.txt @@ -1,3 +1,9 @@ +October 31st, 2005 +0.06 -- fixed fp_mul() and fp_sqr() to trim digits when overflows would occur. Produces numerically inprecise results + (e.g. the lower FP_SIZE digits) but shouldn't segfault at least ;-) + -- Updated the combas so you can turn on and off specific unrolled loops at build time + -- Michael Heyman reported a bug in s_fp_sub() that was pretty substantial and a bug in fp_montgomery_calc_normalization(). Fixed. + August 1st, 2005 0.05 -- Quick fix to the fp_invmod.c code to let it handle even moduli [required for LTC] -- Added makefile.shared to make shared objects [required for LTC] diff --git a/demo/test.c b/demo/test.c index bf1c288..d79759c 100644 --- a/demo/test.c +++ b/demo/test.c @@ -283,7 +283,8 @@ sqrtime: //#else monttime: printf("Montgomery:\n"); - for (t = 2; t <= (FP_SIZE/2)-2; t += 2) { + for (t = 2; t <= (FP_SIZE/2)-4; t += 2) { +// printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2); fp_zero(&a); for (ix = 0; ix < t; ix++) { a.dp[ix] = ix | 1; @@ -343,6 +344,9 @@ expttime: return; testing: + fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f); fp_zero(&a); + + div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n = sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= mul_d_n = 0; diff --git a/doc/tfm.pdf b/doc/tfm.pdf index a85837d..1f6b759 100644 Binary files a/doc/tfm.pdf and b/doc/tfm.pdf differ diff --git a/fp_montgomery_calc_normalization.c b/fp_montgomery_calc_normalization.c index 1ac28b7..2949f2b 100644 --- a/fp_montgomery_calc_normalization.c +++ b/fp_montgomery_calc_normalization.c @@ -18,6 +18,7 @@ void fp_montgomery_calc_normalization(fp_int *a, fp_int *b) /* how many bits of last digit does b use */ bits = fp_count_bits (b) % DIGIT_BIT; + if (!bits) bits = DIGIT_BIT; /* compute A = B^(n-1) * 2^(bits-1) */ if (b->used > 1) { diff --git a/fp_montgomery_reduce.c b/fp_montgomery_reduce.c index 53d6773..b0c2305 100644 --- a/fp_montgomery_reduce.c +++ b/fp_montgomery_reduce.c @@ -64,6 +64,8 @@ asm( \ :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ : "%rax", "%rdx", "%cc") +#ifdef TFM_HUGE + #define INNERMUL8 \ asm( \ "movq 0(%5),%%rax \n\t" \ @@ -157,6 +159,8 @@ asm( \ : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ : "%rax", "%rdx", "%r10", "%r11", "%cc") +#endif + #define PROPCARRY \ asm( \ @@ -306,6 +310,11 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) fp_digit c[FP_SIZE], *_c, *tmpm, mu; int oldused, x, y, pa; + /* bail if too large */ + if (m->used > (FP_SIZE/2)) { + return; + } + #if defined(USE_MEMSET) /* now zero the buff */ memset(c, 0, sizeof c); @@ -331,7 +340,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) _c = c + x; tmpm = m->dp; y = 0; - #if defined(TFM_X86_64) + #if defined(TFM_X86_64) && defined(TFM_HUGE) for (; y < (pa & ~7); y += 8) { INNERMUL8; _c += 8; diff --git a/fp_mul.c b/fp_mul.c index 71c5f47..a8b42da 100644 --- a/fp_mul.c +++ b/fp_mul.c @@ -15,6 +15,12 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C) int r, y, yy, s; fp_int ac, bd, comp, amb, cmd, t1, t2; + /* call generic if we're out of range */ + if (A->used + B->used > FP_SIZE) { + fp_mul_comba(A, B, C); + return ; + } + y = MAX(A->used, B->used); yy = MIN(A->used, B->used); if (yy <= 8 || y <= 64) { @@ -31,11 +37,15 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C) #elif defined(TFM_HUGE) if (0) { 1; #endif -#if defined(TFM_HUGE) +#if defined(TFM_MUL32) } else if (y <= 32) { fp_mul_comba32(A,B,C); +#endif +#if defined(TFM_MUL48) } else if (y <= 48) { fp_mul_comba48(A,B,C); +#endif +#if defined(TFM_MUL64) } else if (y <= 64) { fp_mul_comba64(A,B,C); #endif diff --git a/fp_mul_comba.c b/fp_mul_comba.c index cf51eba..8aa99cc 100644 --- a/fp_mul_comba.c +++ b/fp_mul_comba.c @@ -47,7 +47,7 @@ /* this should multiply i and j */ #define MULADD(i, j) \ -asm( \ +asm( \ "movl %6,%%eax \n\t" \ "mull %7 \n\t" \ "addl %%eax,%0 \n\t" \ @@ -266,8 +266,8 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) COMBA_FINI; dst->used = pa; + dst->sign = A->sign ^ B->sign; fp_clamp(dst); - dst->sign = dst->used ? A->sign ^ B->sign : FP_ZPOS; fp_copy(dst, C); } @@ -1497,8 +1497,7 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C) #endif -#ifdef TFM_HUGE - +#ifdef TFM_MUL32 void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) { fp_digit c0, c1, c2, at[64]; @@ -1765,7 +1764,9 @@ void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) fp_clamp(C); COMBA_FINI; } +#endif +#ifdef TFM_MUL64 void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C) { fp_digit c0, c1, c2, at[128]; @@ -2288,7 +2289,9 @@ void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C) fp_clamp(C); COMBA_FINI; } +#endif +#ifdef TFM_MUL48 void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C) { fp_digit c0, c1, c2, at[96]; @@ -2683,8 +2686,6 @@ void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C) fp_clamp(C); COMBA_FINI; } - - #endif diff --git a/fp_set.c b/fp_set.c index e8c849b..39f4cf4 100644 --- a/fp_set.c +++ b/fp_set.c @@ -13,7 +13,7 @@ void fp_set(fp_int *a, fp_digit b) { fp_zero(a); a->dp[0] = b; - a->used = b ? 1 : 0; + a->used = a->dp[0] ? 1 : 0; } /* $Source$ */ diff --git a/fp_sqr.c b/fp_sqr.c index da3c944..c95fe7b 100644 --- a/fp_sqr.c +++ b/fp_sqr.c @@ -15,6 +15,12 @@ void fp_sqr(fp_int *A, fp_int *B) int r, y, s; fp_int aa, bb, comp, amb, t1; + /* call generic if we're out of range */ + if (A->used + A->used > FP_SIZE) { + fp_sqr_comba(A, B); + return ; + } + y = A->used; if (y <= 64) { @@ -24,11 +30,15 @@ void fp_sqr(fp_int *A, fp_int *B) #elif defined(TFM_HUGE) if (0) { 1; #endif -#if defined(TFM_HUGE) +#if defined(TFM_SQR32) } else if (y <= 32) { fp_sqr_comba32(A,B); +#endif +#if defined(TFM_SQR48) } else if (y <= 48) { fp_sqr_comba48(A,B); +#endif +#if defined(TFM_SQR64) } else if (y <= 64) { fp_sqr_comba64(A,B); #endif diff --git a/fp_sqr_comba.c b/fp_sqr_comba.c index d5a39e5..443f910 100644 --- a/fp_sqr_comba.c +++ b/fp_sqr_comba.c @@ -1945,7 +1945,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B) #endif /* TFM_SMALL_SET */ -#ifdef TFM_HUGE +#ifdef TFM_SQR32 void fp_sqr_comba32(fp_int *A, fp_int *B) { fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; @@ -2272,16 +2272,14 @@ void fp_sqr_comba32(fp_int *A, fp_int *B) COMBA_STORE2(b[63]); COMBA_FINI; + memcpy(B->dp, b, 64 * sizeof(fp_digit)); B->used = 64; B->sign = FP_ZPOS; - memcpy(B->dp, b, 64 * sizeof(fp_digit)); fp_clamp(B); } - - #endif -#ifdef TFM_HUGE +#ifdef TFM_SQR64 void fp_sqr_comba64(fp_int *A, fp_int *B) { fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2; @@ -2933,7 +2931,9 @@ void fp_sqr_comba64(fp_int *A, fp_int *B) memcpy(B->dp, b, 128 * sizeof(fp_digit)); fp_clamp(B); } +#endif +#ifdef TFM_SQR48 void fp_sqr_comba48(fp_int *A, fp_int *B) { fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2; @@ -2985,7 +2985,7 @@ void fp_sqr_comba48(fp_int *A, fp_int *B) /* output 8 */ CARRY_FORWARD; - SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); COMBA_STORE(b[8]); /* output 9 */ @@ -3420,9 +3420,9 @@ void fp_sqr_comba48(fp_int *A, fp_int *B) COMBA_STORE2(b[95]); COMBA_FINI; + memcpy(B->dp, b, 96 * sizeof(fp_digit)); B->used = 96; B->sign = FP_ZPOS; - memcpy(B->dp, b, 96 * sizeof(fp_digit)); fp_clamp(B); } diff --git a/makefile b/makefile index a9ad6b2..b74b0bb 100644 --- a/makefile +++ b/makefile @@ -1,7 +1,7 @@ #makefile for TomsFastMath # # -VERSION=0.05 +VERSION=0.06 CFLAGS += -Wall -W -Wshadow -I./ @@ -85,7 +85,7 @@ install: $(LIBNAME) install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH) mtest/mtest: mtest/mtest.c - cd mtest ; make mtest + cd mtest ; CFLAGS="$(CFLAGS) -I../" make mtest test: $(LIBNAME) demo/test.o mtest/mtest $(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test @@ -143,5 +143,5 @@ zipup: no_oops docs clean zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/* # $Source: /cvs/libtom/tomsfastmath/makefile,v $ -# $Revision: 1.17 $ -# $Date: 2005/07/30 04:23:55 $ +# $Revision: 1.19 $ +# $Date: 2005/08/25 23:53:40 $ diff --git a/makefile.shared b/makefile.shared index 74fa873..2ccac42 100644 --- a/makefile.shared +++ b/makefile.shared @@ -1,6 +1,7 @@ #makefile for TomsFastMath # # +VERSION=0:6 CC=libtool --mode=compile gcc @@ -19,7 +20,6 @@ CFLAGS += -fomit-frame-pointer endif -VERSION=0:5 OBJECTS = \ fp_set.o \ @@ -81,12 +81,12 @@ endif default: $(LIBNAME) +objs: $(OBJECTS) + $(LIBNAME): $(OBJECTS) + libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]lo" | xargs` -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION) install: $(LIBNAME) - libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]lo" | xargs` -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION) - libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]o" | xargs` -o $(LIBNAME_S) - ranlib $(LIBNAME_S) libtool --silent --mode=install install -c $(LIBNAME) $(LIBPATH)/$(LIBNAME) install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH) install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH) @@ -104,6 +104,6 @@ stest: $(LIBNAME) demo/stest.o $(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest # $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $ -# $Revision: 1.4 $ -# $Date: 2005/07/28 03:08:35 $ +# $Revision: 1.7 $ +# $Date: 2005/10/06 23:31:17 $ diff --git a/mtest/mtest.c b/mtest/mtest.c index 02fd483..66a2621 100644 --- a/mtest/mtest.c +++ b/mtest/mtest.c @@ -38,6 +38,8 @@ mulmod #include #include #include +#define CRYPT +#include "../tfm.h" FILE *rng; @@ -47,7 +49,7 @@ void rand_num(mp_int *a) int n, size; unsigned char buf[2048]; - size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 256; + size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % (FP_MAX_SIZE/16 - DIGIT_BIT/2); buf[0] = (fgetc(rng)&1)?1:0; fread(buf+1, 1, size, rng); while (buf[1] == 0) buf[1] = fgetc(rng); @@ -60,7 +62,7 @@ void rand_num2(mp_int *a) int n, size; unsigned char buf[2048]; - size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 256; + size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % (FP_MAX_SIZE/16 - DIGIT_BIT/2); buf[0] = (fgetc(rng)&1)?1:0; fread(buf+1, 1, size, rng); while (buf[1] == 0) buf[1] = fgetc(rng); @@ -118,7 +120,6 @@ int main(void) } #endif n = fgetc(rng) % 16; - if (n == 0) { /* add tests */ rand_num(&a); diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c index 132c1e2..4a8a033 100644 --- a/pre_gen/mpi.c +++ b/pre_gen/mpi.c @@ -1663,6 +1663,7 @@ void fp_montgomery_calc_normalization(fp_int *a, fp_int *b) /* how many bits of last digit does b use */ bits = fp_count_bits (b) % DIGIT_BIT; + if (!bits) bits = DIGIT_BIT; /* compute A = B^(n-1) * 2^(bits-1) */ if (b->used > 1) { @@ -1755,6 +1756,8 @@ asm( \ :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ : "%rax", "%rdx", "%cc") +#ifdef TFM_HUGE + #define INNERMUL8 \ asm( \ "movq 0(%5),%%rax \n\t" \ @@ -1848,6 +1851,8 @@ asm( \ : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ : "%rax", "%rdx", "%r10", "%r11", "%cc") +#endif + #define PROPCARRY \ asm( \ @@ -1997,6 +2002,11 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) fp_digit c[FP_SIZE], *_c, *tmpm, mu; int oldused, x, y, pa; + /* bail if too large */ + if (m->used > (FP_SIZE/2)) { + return; + } + #if defined(USE_MEMSET) /* now zero the buff */ memset(c, 0, sizeof c); @@ -2022,7 +2032,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) _c = c + x; tmpm = m->dp; y = 0; - #if defined(TFM_X86_64) + #if defined(TFM_X86_64) && defined(TFM_HUGE) for (; y < (pa & ~7); y += 8) { INNERMUL8; _c += 8; @@ -2140,6 +2150,12 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C) int r, y, yy, s; fp_int ac, bd, comp, amb, cmd, t1, t2; + /* call generic if we're out of range */ + if (A->used + B->used > FP_SIZE) { + fp_mul_comba(A, B, C); + return ; + } + y = MAX(A->used, B->used); yy = MIN(A->used, B->used); if (yy <= 8 || y <= 64) { @@ -2156,11 +2172,15 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C) #elif defined(TFM_HUGE) if (0) { 1; #endif -#if defined(TFM_HUGE) +#if defined(TFM_MUL32) } else if (y <= 32) { fp_mul_comba32(A,B,C); +#endif +#if defined(TFM_MUL48) } else if (y <= 48) { fp_mul_comba48(A,B,C); +#endif +#if defined(TFM_MUL64) } else if (y <= 64) { fp_mul_comba64(A,B,C); #endif @@ -2444,7 +2464,7 @@ void fp_mul_2d(fp_int *a, int b, fp_int *c) /* this should multiply i and j */ #define MULADD(i, j) \ -asm( \ +asm( \ "movl %6,%%eax \n\t" \ "mull %7 \n\t" \ "addl %%eax,%0 \n\t" \ @@ -2663,8 +2683,8 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) COMBA_FINI; dst->used = pa; + dst->sign = A->sign ^ B->sign; fp_clamp(dst); - dst->sign = dst->used ? A->sign ^ B->sign : FP_ZPOS; fp_copy(dst, C); } @@ -3894,8 +3914,7 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C) #endif -#ifdef TFM_HUGE - +#ifdef TFM_MUL32 void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) { fp_digit c0, c1, c2, at[64]; @@ -4162,7 +4181,9 @@ void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) fp_clamp(C); COMBA_FINI; } +#endif +#ifdef TFM_MUL64 void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C) { fp_digit c0, c1, c2, at[128]; @@ -4685,7 +4706,9 @@ void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C) fp_clamp(C); COMBA_FINI; } +#endif +#ifdef TFM_MUL48 void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C) { fp_digit c0, c1, c2, at[96]; @@ -5080,8 +5103,6 @@ void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C) fp_clamp(C); COMBA_FINI; } - - #endif @@ -5657,7 +5678,7 @@ void fp_set(fp_int *a, fp_digit b) { fp_zero(a); a->dp[0] = b; - a->used = b ? 1 : 0; + a->used = a->dp[0] ? 1 : 0; } /* $Source$ */ @@ -5707,6 +5728,12 @@ void fp_sqr(fp_int *A, fp_int *B) int r, y, s; fp_int aa, bb, comp, amb, t1; + /* call generic if we're out of range */ + if (A->used + A->used > FP_SIZE) { + fp_sqr_comba(A, B); + return ; + } + y = A->used; if (y <= 64) { @@ -5716,11 +5743,15 @@ void fp_sqr(fp_int *A, fp_int *B) #elif defined(TFM_HUGE) if (0) { 1; #endif -#if defined(TFM_HUGE) +#if defined(TFM_SQR32) } else if (y <= 32) { fp_sqr_comba32(A,B); +#endif +#if defined(TFM_SQR48) } else if (y <= 48) { fp_sqr_comba48(A,B); +#endif +#if defined(TFM_SQR64) } else if (y <= 64) { fp_sqr_comba64(A,B); #endif @@ -7761,7 +7792,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B) #endif /* TFM_SMALL_SET */ -#ifdef TFM_HUGE +#ifdef TFM_SQR32 void fp_sqr_comba32(fp_int *A, fp_int *B) { fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; @@ -8088,16 +8119,14 @@ void fp_sqr_comba32(fp_int *A, fp_int *B) COMBA_STORE2(b[63]); COMBA_FINI; + memcpy(B->dp, b, 64 * sizeof(fp_digit)); B->used = 64; B->sign = FP_ZPOS; - memcpy(B->dp, b, 64 * sizeof(fp_digit)); fp_clamp(B); } - - #endif -#ifdef TFM_HUGE +#ifdef TFM_SQR64 void fp_sqr_comba64(fp_int *A, fp_int *B) { fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2; @@ -8749,7 +8778,9 @@ void fp_sqr_comba64(fp_int *A, fp_int *B) memcpy(B->dp, b, 128 * sizeof(fp_digit)); fp_clamp(B); } +#endif +#ifdef TFM_SQR48 void fp_sqr_comba48(fp_int *A, fp_int *B) { fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2; @@ -8801,7 +8832,7 @@ void fp_sqr_comba48(fp_int *A, fp_int *B) /* output 8 */ CARRY_FORWARD; - SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); + SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); COMBA_STORE(b[8]); /* output 9 */ @@ -9236,9 +9267,9 @@ void fp_sqr_comba48(fp_int *A, fp_int *B) COMBA_STORE2(b[95]); COMBA_FINI; + memcpy(B->dp, b, 96 * sizeof(fp_digit)); B->used = 96; B->sign = FP_ZPOS; - memcpy(B->dp, b, 96 * sizeof(fp_digit)); fp_clamp(B); } @@ -9652,11 +9683,11 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c) c->dp[x] = (fp_digit)t; t >>= DIGIT_BIT; } - if (t != 0 && x != FP_SIZE) { + if (t != 0 && x < FP_SIZE) { c->dp[c->used++] = (fp_digit)t; ++x; } - + c->used = x; for (; x < oldused; x++) { c->dp[x] = 0; } @@ -9684,18 +9715,23 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c) /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) { - int x, oldused; + int x, oldbused, oldused; fp_word t; - oldused = c->used; - c->used = a->used; + oldused = c->used; + oldbused = b->used; + c->used = a->used; t = 0; - for (x = 0; x < a->used; x++) { - t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t); - c->dp[x] = (fp_digit)t; - t = (t >> DIGIT_BIT) & 1; + for (x = 0; x < oldbused; x++) { + t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t); + c->dp[x] = (fp_digit)t; + t = (t >> DIGIT_BIT)&1; } - + for (; x < a->used; x++) { + t = ((fp_word)a->dp[x]) - t; + c->dp[x] = (fp_digit)t; + t = (t >> DIGIT_BIT); + } for (; x < oldused; x++) { c->dp[x] = 0; } diff --git a/s_fp_add.c b/s_fp_add.c index 3a874f2..7b882e3 100644 --- a/s_fp_add.c +++ b/s_fp_add.c @@ -25,11 +25,11 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c) c->dp[x] = (fp_digit)t; t >>= DIGIT_BIT; } - if (t != 0 && x != FP_SIZE) { + if (t != 0 && x < FP_SIZE) { c->dp[c->used++] = (fp_digit)t; ++x; } - + c->used = x; for (; x < oldused; x++) { c->dp[x] = 0; } diff --git a/s_fp_sub.c b/s_fp_sub.c index 41cc6c4..ffabee8 100644 --- a/s_fp_sub.c +++ b/s_fp_sub.c @@ -12,18 +12,23 @@ /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) { - int x, oldused; + int x, oldbused, oldused; fp_word t; - oldused = c->used; - c->used = a->used; + oldused = c->used; + oldbused = b->used; + c->used = a->used; t = 0; - for (x = 0; x < a->used; x++) { - t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t); - c->dp[x] = (fp_digit)t; - t = (t >> DIGIT_BIT) & 1; + for (x = 0; x < oldbused; x++) { + t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t); + c->dp[x] = (fp_digit)t; + t = (t >> DIGIT_BIT)&1; } - + for (; x < a->used; x++) { + t = ((fp_word)a->dp[x]) - t; + c->dp[x] = (fp_digit)t; + t = (t >> DIGIT_BIT); + } for (; x < oldused; x++) { c->dp[x] = 0; } diff --git a/tfm.dvi b/tfm.dvi index 7766aaa..1b76490 100644 Binary files a/tfm.dvi and b/tfm.dvi differ diff --git a/tfm.h b/tfm.h index da91e81..c17a1d0 100644 --- a/tfm.h +++ b/tfm.h @@ -37,7 +37,12 @@ Enable these if you are doing 32, 48 or 64 digit multiplications (useful for RSA) Less important on 64-bit machines as 32 digits == 2048 bits */ -#define TFM_HUGE +#define TFM_MUL32 +#define TFM_MUL48 +#define TFM_MUL64 +#define TFM_SQR32 +#define TFM_SQR48 +#define TFM_SQR64 /* do we want some overflow checks Not required if you make sure your numbers are within range (e.g. by default a modulus for fp_exptmod() can only be upto 2048 bits long) @@ -61,7 +66,7 @@ * You can externally define this or it defaults to 4096-bits [allowing multiplications upto 2048x2048 bits ] */ #ifndef FP_MAX_SIZE - #define FP_MAX_SIZE (4096+(4*DIGIT_BIT)) + #define FP_MAX_SIZE (4096+(8*DIGIT_BIT)) #endif /* will this lib work? */ diff --git a/tfm.log b/tfm.log index e8dbba0..282bc8b 100644 --- a/tfm.log +++ b/tfm.log @@ -1,4 +1,4 @@ -This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.4.10) 1 AUG 2005 13:34 +This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.9.20) 31 OCT 2005 11:30 entering extended mode **tfm (./tfm.tex @@ -329,4 +329,4 @@ Here is how much of TeX's memory you used: 580 hyphenation exceptions out of 1000 25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s -Output written on tfm.dvi (25 pages, 51612 bytes). +Output written on tfm.dvi (25 pages, 51616 bytes). diff --git a/tfm.tex b/tfm.tex index 761d95f..2985500 100644 --- a/tfm.tex +++ b/tfm.tex @@ -49,7 +49,7 @@ \begin{document} \frontmatter \pagestyle{empty} -\title{TomsFastMath User Manual \\ v0.05} +\title{TomsFastMath User Manual \\ v0.06} \author{Tom St Denis \\ tomstdenis@gmail.com} \maketitle This text and library are all hereby placed in the public domain. This book has been formatted for B5