added tomsfastmath-0.12

This commit is contained in:
Tom St Denis 2007-03-14 17:58:46 +00:00 committed by Steffen Jaeckel
parent ea10e969b2
commit da88c2d42f
109 changed files with 730 additions and 8685 deletions

View File

@ -1,3 +1,14 @@
March 14th, 2007
0.12 -- Christophe Devine contributed MIPS asm w00t
++ quick release to get the MIPS code out there
March 10th, 2007
0.11 -- re-org'ed the source tree, it's cooler now
-- cleaned up mul/sqr so there is only one file per unit, should help building with older compilers and/or on slower boxes
-- [CRI] optimized fp_read_unsigned_bin
-- fixed -0 ... again ... I now have less hair on my head.
-- [CRI] renamed bn_reverse() -> fp_reverse()
November 1st, 2006 November 1st, 2006
0.10 -- Unrolled Montgomery for 1..16 digits with TFM_SMALL_MONT_SET between 10% and 25% speedup depending on size. 0.10 -- Unrolled Montgomery for 1..16 digits with TFM_SMALL_MONT_SET between 10% and 25% speedup depending on size.
-- fixed fp_sqr_comba.c so it builds in ISO C mode [Andreas Lange] -- fixed fp_sqr_comba.c so it builds in ISO C mode [Andreas Lange]

View File

@ -411,6 +411,93 @@ sqrtime:
} }
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2); printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
} }
invmodtime:
printf("Invmod:\n");
for (t = 2; t < FP_SIZE/2; t += 2) {
fp_zero(&a);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix | 1;
}
a.used = t;
fp_zero(&b);
for (ix = 0; ix < t; ix++) {
b.dp[ix] = rand();
}
b.used = t;
fp_clamp(&b);
fp_zero(&c);
t2 = -1;
for (ix = 0; ix < 100; ++ix) {
t1 = TIMFUNC();
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
t2 = (TIMFUNC() - t1)>>6;
if (t1<t2) { --ix; t2 = t1; }
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
//#else //#else
monttime: monttime:
printf("Montgomery:\n"); printf("Montgomery:\n");
@ -531,10 +618,10 @@ expttime:
} }
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2); printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
} }
return; return 0;
#endif #endif
return; return 0;
testing: testing:
fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f); fp_zero(&a); fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f); fp_zero(&a);

Binary file not shown.

30
filter.pl Normal file
View File

@ -0,0 +1,30 @@
#!/usr/bin/perl
# we want to filter every between START_INS and END_INS out and then insert crap from another file (this is fun)
$dst = shift;
$ins = shift;
open(SRC,"<$dst");
open(INS,"<$ins");
open(TMP,">tmp.delme");
$l = 0;
while (<SRC>) {
if ($_ =~ /START_INS/) {
print TMP $_;
$l = 1;
while (<INS>) {
print TMP $_;
}
close INS;
} elsif ($_ =~ /END_INS/) {
print TMP $_;
$l = 0;
} elsif ($l == 0) {
print TMP $_;
}
}
close TMP;
close SRC;

View File

@ -1,28 +0,0 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
{
/* zero the int */
fp_zero (a);
/* read the bytes in */
for (; c > 0; c--) {
fp_mul_2d (a, 8, a);
a->dp[0] |= *b++;
a->used += 1;
}
fp_clamp (a);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

1
gen.pl
View File

@ -7,6 +7,7 @@ use strict;
open( OUT, ">mpi.c" ) or die "Couldn't open mpi.c for writing: $!"; open( OUT, ">mpi.c" ) or die "Couldn't open mpi.c for writing: $!";
foreach my $filename (glob "*fp_*.c") { foreach my $filename (glob "*fp_*.c") {
next if ($filename eq "fp_sqr_comba_generic.c");
open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!"; open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!";
print OUT "/* Start: $filename */\n"; print OUT "/* Start: $filename */\n";
print OUT while <SRC>; print OUT while <SRC>;

9
genlist.sh Normal file
View File

@ -0,0 +1,9 @@
#!/bin/bash
export a=`find . -type f | sort | grep "[.]/src" | grep "[.]c" | grep -v generators | sed -e 'sE\./EE' | sed -e 's/\.c/\.o/' | xargs`
perl ./parsenames.pl OBJECTS "$a"
export a=`find . -type f | grep [.]/src | grep [.]h | sed -e 'se\./ee' | xargs`
perl ./parsenames.pl HEADERS "$a"
# $Source: /cvs/libtom/tomsfastmath/genlist.sh,v $
# $Revision: 1.1 $
# $Date: 2006/12/31 21:31:40 $

View File

@ -1,9 +1,9 @@
#makefile for TomsFastMath #makefile for TomsFastMath
# #
# #
VERSION=0.10 VERSION=0.12
CFLAGS += -Wall -W -Wshadow -I./ CFLAGS += -Wall -W -Wshadow -Isrc/headers
ifndef MAKE ifndef MAKE
MAKE=make MAKE=make
@ -22,35 +22,34 @@ CFLAGS += -fomit-frame-pointer
endif endif
OBJECTS = \ #START_INS
fp_set.o \ OBJECTS=src/addsub/fp_add.o src/addsub/fp_add_d.o src/addsub/fp_addmod.o src/addsub/fp_cmp.o \
\ src/addsub/fp_cmp_d.o src/addsub/fp_cmp_mag.o src/addsub/fp_sub.o src/addsub/fp_sub_d.o \
fp_rshd.o fp_lshd.o fp_div_2d.o fp_mod_2d.o fp_mul_2d.o fp_2expt.o \ src/addsub/fp_submod.o src/addsub/s_fp_add.o src/addsub/s_fp_sub.o src/bin/fp_radix_size.o \
fp_mul_2.o fp_div_2.o \ src/bin/fp_read_radix.o src/bin/fp_read_signed_bin.o src/bin/fp_read_unsigned_bin.o \
\ src/bin/fp_reverse.o src/bin/fp_s_rmap.o src/bin/fp_signed_bin_size.o src/bin/fp_to_signed_bin.o \
fp_cnt_lsb.o \ src/bin/fp_to_unsigned_bin.o src/bin/fp_toradix.o src/bin/fp_unsigned_bin_size.o src/bit/fp_cnt_lsb.o \
\ src/bit/fp_count_bits.o src/bit/fp_div_2.o src/bit/fp_div_2d.o src/bit/fp_lshd.o src/bit/fp_mod_2d.o \
fp_add.o fp_sub.o fp_mul.o fp_sqr.o fp_div.o fp_mod.o \ src/bit/fp_rshd.o src/divide/fp_div.o src/divide/fp_div_d.o src/divide/fp_mod.o src/divide/fp_mod_d.o \
s_fp_add.o s_fp_sub.o \ src/exptmod/fp_2expt.o src/exptmod/fp_exptmod.o src/misc/fp_ident.o src/misc/fp_set.o \
\ src/mont/fp_montgomery_calc_normalization.o src/mont/fp_montgomery_reduce.o \
fp_cmp_d.o fp_add_d.o fp_sub_d.o fp_mul_d.o fp_div_d.o fp_mod_d.o \ src/mont/fp_montgomery_setup.o src/mul/fp_mul.o src/mul/fp_mul_2.o src/mul/fp_mul_2d.o \
fp_addmod.o fp_submod.o fp_mulmod.o fp_sqrmod.o fp_invmod.o \ src/mul/fp_mul_comba.o src/mul/fp_mul_comba_12.o src/mul/fp_mul_comba_17.o src/mul/fp_mul_comba_20.o \
fp_gcd.o fp_lcm.o fp_prime_miller_rabin.o fp_isprime.o \ src/mul/fp_mul_comba_24.o src/mul/fp_mul_comba_28.o src/mul/fp_mul_comba_3.o src/mul/fp_mul_comba_32.o \
fp_prime_random_ex.o fp_mul_comba.o fp_sqr_comba.o \ src/mul/fp_mul_comba_4.o src/mul/fp_mul_comba_48.o src/mul/fp_mul_comba_6.o src/mul/fp_mul_comba_64.o \
\ src/mul/fp_mul_comba_7.o src/mul/fp_mul_comba_8.o src/mul/fp_mul_comba_9.o \
fp_montgomery_setup.o fp_montgomery_calc_normalization.o fp_montgomery_reduce.o \ src/mul/fp_mul_comba_small_set.o src/mul/fp_mul_d.o src/mul/fp_mulmod.o src/numtheory/fp_gcd.o \
\ src/numtheory/fp_invmod.o src/numtheory/fp_isprime.o src/numtheory/fp_lcm.o \
fp_exptmod.o \ src/numtheory/fp_prime_miller_rabin.o src/numtheory/fp_prime_random_ex.o src/sqr/fp_sqr.o \
\ src/sqr/fp_sqr_comba.o src/sqr/fp_sqr_comba_12.o src/sqr/fp_sqr_comba_17.o src/sqr/fp_sqr_comba_20.o \
fp_cmp.o fp_cmp_mag.o \ src/sqr/fp_sqr_comba_24.o src/sqr/fp_sqr_comba_28.o src/sqr/fp_sqr_comba_3.o src/sqr/fp_sqr_comba_32.o \
\ src/sqr/fp_sqr_comba_4.o src/sqr/fp_sqr_comba_48.o src/sqr/fp_sqr_comba_6.o src/sqr/fp_sqr_comba_64.o \
fp_unsigned_bin_size.o fp_read_unsigned_bin.o fp_to_unsigned_bin.o \ src/sqr/fp_sqr_comba_7.o src/sqr/fp_sqr_comba_8.o src/sqr/fp_sqr_comba_9.o \
fp_signed_bin_size.o fp_read_signed_bin.o fp_to_signed_bin.o \ src/sqr/fp_sqr_comba_generic.o src/sqr/fp_sqr_comba_small_set.o src/sqr/fp_sqrmod.o
fp_read_radix.o fp_toradix.o fp_radix_size.o fp_count_bits.o fp_reverse.o fp_s_rmap.o \
\
fp_ident.o
HEADERS=tfm.h HEADERS=src/headers/tfm.h
#END_INS
ifndef LIBPATH ifndef LIBPATH
LIBPATH=/usr/lib LIBPATH=/usr/lib
@ -96,6 +95,14 @@ test: $(LIBNAME) demo/test.o mtest/mtest
timing: $(LIBNAME) demo/test.o timing: $(LIBNAME) demo/test.o
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test $(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
profiled:
CFLAGS="${CFLAGS} -fprofile-generate" MAKE=${MAKE} ${MAKE} timing
./test
rm -f `find . -type f | grep "[.]o" | xargs`
rm -f `find . -type f | grep "[.]a" | xargs`
rm -f test
CFLAGS="${CFLAGS} -fprofile-use" MAKE=${MAKE} ${MAKE} timing
stest: $(LIBNAME) demo/stest.o stest: $(LIBNAME) demo/stest.o
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME) -o stest $(CC) $(CFLAGS) demo/stest.o $(LIBNAME) -o stest
@ -148,5 +155,5 @@ zipup: no_oops docs clean
mv -f tfm* ~ ; rm -rf tomsfastmath-$(VERSION) mv -f tfm* ~ ; rm -rf tomsfastmath-$(VERSION)
# $Source: /cvs/libtom/tomsfastmath/makefile,v $ # $Source: /cvs/libtom/tomsfastmath/makefile,v $
# $Revision: 1.29 $ # $Revision: 1.38 $
# $Date: 2006/10/22 13:27:10 $ # $Date: 2007/03/13 01:23:03 $

View File

@ -1,11 +1,11 @@
#makefile for TomsFastMath #makefile for TomsFastMath
# #
# #
VERSION=0:10 VERSION=0:12
CC=libtool --mode=compile --tag=CC gcc CC=libtool --mode=compile --tag=CC gcc
CFLAGS += -Wall -W -Wshadow -I./ CFLAGS += -Wall -W -Wshadow -Isrc/headers
ifndef IGNORE_SPEED ifndef IGNORE_SPEED
@ -20,36 +20,35 @@ CFLAGS += -fomit-frame-pointer
endif endif
#START_INS
OBJECTS=src/addsub/fp_add.o src/addsub/fp_add_d.o src/addsub/fp_addmod.o src/addsub/fp_cmp.o \
src/addsub/fp_cmp_d.o src/addsub/fp_cmp_mag.o src/addsub/fp_sub.o src/addsub/fp_sub_d.o \
src/addsub/fp_submod.o src/addsub/s_fp_add.o src/addsub/s_fp_sub.o src/bin/fp_radix_size.o \
src/bin/fp_read_radix.o src/bin/fp_read_signed_bin.o src/bin/fp_read_unsigned_bin.o \
src/bin/fp_reverse.o src/bin/fp_s_rmap.o src/bin/fp_signed_bin_size.o src/bin/fp_to_signed_bin.o \
src/bin/fp_to_unsigned_bin.o src/bin/fp_toradix.o src/bin/fp_unsigned_bin_size.o src/bit/fp_cnt_lsb.o \
src/bit/fp_count_bits.o src/bit/fp_div_2.o src/bit/fp_div_2d.o src/bit/fp_lshd.o src/bit/fp_mod_2d.o \
src/bit/fp_rshd.o src/divide/fp_div.o src/divide/fp_div_d.o src/divide/fp_mod.o src/divide/fp_mod_d.o \
src/exptmod/fp_2expt.o src/exptmod/fp_exptmod.o src/misc/fp_ident.o src/misc/fp_set.o \
src/mont/fp_montgomery_calc_normalization.o src/mont/fp_montgomery_reduce.o \
src/mont/fp_montgomery_setup.o src/mul/fp_mul.o src/mul/fp_mul_2.o src/mul/fp_mul_2d.o \
src/mul/fp_mul_comba.o src/mul/fp_mul_comba_12.o src/mul/fp_mul_comba_17.o src/mul/fp_mul_comba_20.o \
src/mul/fp_mul_comba_24.o src/mul/fp_mul_comba_28.o src/mul/fp_mul_comba_3.o src/mul/fp_mul_comba_32.o \
src/mul/fp_mul_comba_4.o src/mul/fp_mul_comba_48.o src/mul/fp_mul_comba_6.o src/mul/fp_mul_comba_64.o \
src/mul/fp_mul_comba_7.o src/mul/fp_mul_comba_8.o src/mul/fp_mul_comba_9.o \
src/mul/fp_mul_comba_small_set.o src/mul/fp_mul_d.o src/mul/fp_mulmod.o src/numtheory/fp_gcd.o \
src/numtheory/fp_invmod.o src/numtheory/fp_isprime.o src/numtheory/fp_lcm.o \
src/numtheory/fp_prime_miller_rabin.o src/numtheory/fp_prime_random_ex.o src/sqr/fp_sqr.o \
src/sqr/fp_sqr_comba.o src/sqr/fp_sqr_comba_12.o src/sqr/fp_sqr_comba_17.o src/sqr/fp_sqr_comba_20.o \
src/sqr/fp_sqr_comba_24.o src/sqr/fp_sqr_comba_28.o src/sqr/fp_sqr_comba_3.o src/sqr/fp_sqr_comba_32.o \
src/sqr/fp_sqr_comba_4.o src/sqr/fp_sqr_comba_48.o src/sqr/fp_sqr_comba_6.o src/sqr/fp_sqr_comba_64.o \
src/sqr/fp_sqr_comba_7.o src/sqr/fp_sqr_comba_8.o src/sqr/fp_sqr_comba_9.o \
src/sqr/fp_sqr_comba_generic.o src/sqr/fp_sqr_comba_small_set.o src/sqr/fp_sqrmod.o
OBJECTS = \ HEADERS=src/headers/tfm.h
fp_set.o \
\ #END_INS
fp_rshd.o fp_lshd.o fp_div_2d.o fp_mod_2d.o fp_mul_2d.o fp_2expt.o \
fp_mul_2.o fp_div_2.o \
\
fp_cnt_lsb.o \
\
fp_add.o fp_sub.o fp_mul.o fp_sqr.o fp_div.o fp_mod.o \
s_fp_add.o s_fp_sub.o \
\
fp_cmp_d.o fp_add_d.o fp_sub_d.o fp_mul_d.o fp_div_d.o fp_mod_d.o \
fp_addmod.o fp_submod.o fp_mulmod.o fp_sqrmod.o fp_invmod.o \
fp_gcd.o fp_lcm.o fp_prime_miller_rabin.o fp_isprime.o \
fp_prime_random_ex.o fp_mul_comba.o fp_sqr_comba.o \
\
fp_montgomery_setup.o fp_montgomery_calc_normalization.o fp_montgomery_reduce.o \
\
fp_exptmod.o \
\
fp_cmp.o fp_cmp_mag.o \
\
fp_unsigned_bin_size.o fp_read_unsigned_bin.o fp_to_unsigned_bin.o \
fp_signed_bin_size.o fp_read_signed_bin.o fp_to_signed_bin.o \
fp_read_radix.o fp_toradix.o fp_radix_size.o fp_count_bits.o fp_reverse.o fp_s_rmap.o \
\
fp_ident.o
HEADERS=tfm.h
ifndef LIBPATH ifndef LIBPATH
LIBPATH=/usr/lib LIBPATH=/usr/lib
@ -105,6 +104,6 @@ stest: $(LIBNAME) demo/stest.o
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest $(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest
# $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $ # $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $
# $Revision: 1.14 $ # $Revision: 1.19 $
# $Date: 2006/10/22 13:27:10 $ # $Date: 2007/03/13 01:23:03 $

View File

@ -39,7 +39,7 @@ mulmod
#include <time.h> #include <time.h>
#include <tommath.h> #include <tommath.h>
#define CRYPT #define CRYPT
#include "../tfm.h" #include "../src/headers/tfm.h"
FILE *rng; FILE *rng;

26
parsenames.pl Normal file
View File

@ -0,0 +1,26 @@
#!/usr/bin/perl
#
# Splits the list of files and outputs for makefile type files
# wrapped at 80 chars
#
# Tom St Denis
@a = split(" ", $ARGV[1]);
$b = "$ARGV[0]=";
$len = length($b);
print $b;
foreach my $obj (@a) {
$len = $len + length($obj);
$obj =~ s/\*/\$/;
if ($len > 100) {
printf "\\\n";
$len = length($obj);
}
print "$obj ";
}
#if ($ARGV[0] eq "HEADERS") { print "testprof/tomcrypt_test.h"; }
print "\n\n";
# $Source: /cvs/libtom/tomsfastmath/parsenames.pl,v $
# $Revision: 1.1 $
# $Date: 2006/12/31 21:31:40 $

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,66 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
{
/* zero the int */
fp_zero (a);
/* If we know the endianness of this architecture, and we're using
32-bit fp_digits, we can optimize this */
#if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(FP_64BIT)
/* But not for both simultaneously */
#if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG)
#error Both ENDIAN_LITTLE and ENDIAN_BIG defined.
#endif
{
unsigned char *pd = (unsigned char *)a->dp;
if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
int excess = c - (FP_SIZE * sizeof(fp_digit));
c -= excess;
b += excess;
}
a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
/* read the bytes in */
#ifdef ENDIAN_BIG
{
/* Use Duff's device to unroll the loop. */
int idx = (c - 1) & ~3;
switch (c % 4) {
case 0: do { pd[idx+0] = *b++;
case 3: pd[idx+1] = *b++;
case 2: pd[idx+2] = *b++;
case 1: pd[idx+3] = *b++;
idx -= 4;
} while ((c -= 4) > 0);
}
}
#else
for (c -= 1; c >= 0; c -= 1) {
pd[c] = *b++;
}
#endif
}
#else
/* read the bytes in */
for (; c > 0; c--) {
fp_mul_2d (a, 8, a);
a->dp[0] |= *b++;
a->used += 1;
}
#endif
fp_clamp (a);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -10,7 +10,7 @@
#include <tfm.h> #include <tfm.h>
/* reverse an array, used for radix code */ /* reverse an array, used for radix code */
void bn_reverse (unsigned char *s, int len) void fp_reverse (unsigned char *s, int len)
{ {
int ix, iy; int ix, iy;
unsigned char t; unsigned char t;

View File

@ -21,7 +21,7 @@ void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
b[x++] = (unsigned char) (t.dp[0] & 255); b[x++] = (unsigned char) (t.dp[0] & 255);
fp_div_2d (&t, 8, &t, NULL); fp_div_2d (&t, 8, &t, NULL);
} }
bn_reverse (b, x); fp_reverse (b, x);
} }
/* $Source$ */ /* $Source$ */

View File

@ -47,7 +47,7 @@ int fp_toradix(fp_int *a, char *str, int radix)
/* reverse the digits of the string. In this case _s points /* reverse the digits of the string. In this case _s points
* to the first digit [exluding the sign] of the number] * to the first digit [exluding the sign] of the number]
*/ */
bn_reverse ((unsigned char *)_s, digs); fp_reverse ((unsigned char *)_s, digs);
/* append a NULL so the string is properly terminated */ /* append a NULL so the string is properly terminated */
*str = '\0'; *str = '\0';

View File

@ -13,7 +13,12 @@ static int s_is_power_of_two(fp_digit b, int *p)
{ {
int x; int x;
for (x = 1; x < DIGIT_BIT; x++) { /* fast return if no power of two */
if ((b==0) || (b & (b-1))) {
return 0;
}
for (x = 0; x < DIGIT_BIT; x++) {
if (b == (((fp_digit)1)<<x)) { if (b == (((fp_digit)1)<<x)) {
*p = x; *p = x;
return 1; return 1;

View File

@ -315,13 +315,13 @@ void fp_set(fp_int *a, fp_digit b);
#define fp_copy(a, b) (void)(((a) != (b)) && memcpy((b), (a), sizeof(fp_int))) #define fp_copy(a, b) (void)(((a) != (b)) && memcpy((b), (a), sizeof(fp_int)))
#define fp_init_copy(a, b) fp_copy(b, a) #define fp_init_copy(a, b) fp_copy(b, a)
/* negate and absolute */
#define fp_neg(a, b) { fp_copy(a, b); (b)->sign ^= 1; }
#define fp_abs(a, b) { fp_copy(a, b); (b)->sign = 0; }
/* clamp digits */ /* clamp digits */
#define fp_clamp(a) { while ((a)->used && (a)->dp[(a)->used-1] == 0) --((a)->used); (a)->sign = (a)->used ? (a)->sign : FP_ZPOS; } #define fp_clamp(a) { while ((a)->used && (a)->dp[(a)->used-1] == 0) --((a)->used); (a)->sign = (a)->used ? (a)->sign : FP_ZPOS; }
/* negate and absolute */
#define fp_neg(a, b) { fp_copy(a, b); (b)->sign ^= 1; fp_clamp(b); }
#define fp_abs(a, b) { fp_copy(a, b); (b)->sign = 0; }
/* right shift x digits */ /* right shift x digits */
void fp_rshd(fp_int *a, int x); void fp_rshd(fp_int *a, int x);
@ -457,7 +457,7 @@ int fp_toradix_n(fp_int * a, char *str, int radix, int maxlen);
/* VARIOUS LOW LEVEL STUFFS */ /* VARIOUS LOW LEVEL STUFFS */
void s_fp_add(fp_int *a, fp_int *b, fp_int *c); void s_fp_add(fp_int *a, fp_int *b, fp_int *c);
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c); void s_fp_sub(fp_int *a, fp_int *b, fp_int *c);
void bn_reverse(unsigned char *s, int len); void fp_reverse(unsigned char *s, int len);
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C); void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);

View File

@ -311,6 +311,7 @@ asm( \
" MOVCC %0,#0 \n\t" \ " MOVCC %0,#0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc");
/******************************************************************/
#elif defined(TFM_PPC32) #elif defined(TFM_PPC32)
/* PPC32 */ /* PPC32 */
@ -341,6 +342,7 @@ asm( \
" addze %0,%0 \n\t" \ " addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc"); :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
/******************************************************************/
#elif defined(TFM_PPC64) #elif defined(TFM_PPC64)
/* PPC64 */ /* PPC64 */
@ -352,27 +354,26 @@ asm( \
#define INNERMUL \ #define INNERMUL \
asm( \ asm( \
" mulld 16,%3,%4 \n\t" \ " mulld r16,%3,%4 \n\t" \
" mulhdu 17,%3,%4 \n\t" \ " mulhdu r17,%3,%4 \n\t" \
" addc 16,16,%0 \n\t" \ " addc r16,16,%0 \n\t" \
" addze 17,17 \n\t" \ " addze r17,r17 \n\t" \
" ldx 18,0,%1 \n\t" \ " ldx r18,0,%1 \n\t" \
" addc 16,16,18 \n\t" \ " addc r16,r16,r18 \n\t" \
" addze %0,17 \n\t" \ " addze %0,r17 \n\t" \
" sdx 16,0,%1 \n\t" \ " sdx r16,0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm; :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","%cc"); ++tmpm;
#define PROPCARRY \ #define PROPCARRY \
asm( \ asm( \
" ldx 16,0,%1 \n\t" \ " ldx r16,0,%1 \n\t" \
" addc 16,16,%0 \n\t" \ " addc r16,r16,%0 \n\t" \
" sdx 16,0,%1 \n\t" \ " sdx r16,0,%1 \n\t" \
" xor %0,%0,%0 \n\t" \ " xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \ " addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc"); :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
/******************************************************************/ /******************************************************************/
#elif defined(TFM_AVR32) #elif defined(TFM_AVR32)
/* AVR32 */ /* AVR32 */
@ -402,6 +403,40 @@ asm( \
" acr %0 \n\t" \ " acr %0 \n\t" \
:"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","%cc"); :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","%cc");
/******************************************************************/
#elif defined(TFM_MIPS)
/* MIPS */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
" multu %3,%4 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu $12,$12,%0 \n\t" \
" sltu $10,$12,%0 \n\t" \
" addu $13,$13,$10 \n\t" \
" lw $10,%1 \n\t" \
" addu $12,$12,$10 \n\t" \
" sltu $10,$12,$10 \n\t" \
" addu %0,$13,$10 \n\t" \
" sw $12,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"$10","$12","$13"); ++tmpm;
#define PROPCARRY \
asm( \
" lw $10,%1 \n\t" \
" addu $10,$10,%0 \n\t" \
" sw $10,%1 \n\t" \
" sltu %0,$10,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"$10");
/******************************************************************/
#else #else
/* ISO C code */ /* ISO C code */
@ -428,7 +463,7 @@ asm( \
#define LO 0 #define LO 0
#ifdef TFM_SMALL_MONT_SET #ifdef TFM_SMALL_MONT_SET
#include "fp_mont_small.c" #include "fp_mont_small.i"
#endif #endif
/* computes x/R == x (mod N) via Montgomery Reduction */ /* computes x/R == x (mod N) via Montgomery Reduction */

View File

@ -205,15 +205,15 @@ asm( \
#define COMBA_FINI #define COMBA_FINI
/* untested: will mulhwu change the flags? Docs say no */ /* untested: will mulhdu change the flags? Docs say no */
#define MULADD(i, j) \ #define MULADD(i, j) \
asm( \ asm( \
" mulld 16,%6,%7 \n\t" \ " mulld r16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \ " addc %0,%0,16 \n\t" \
" mulhdu 16,%6,%7 \n\t" \ " mulhdu r16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \ " adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
#elif defined(TFM_AVR32) #elif defined(TFM_AVR32)
@ -243,6 +243,39 @@ asm( \
" acr %2 \n\t"\ " acr %2 \n\t"\
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
#elif defined(TFM_MIPS)
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
#define MULADD(i, j) \
asm( \
" multu %6,%7 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu %0,%0,$12 \n\t" \
" sltu $12,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $13,%1,$13 \n\t" \
" addu %1,%1,$12 \n\t" \
" sltu $12,%1,$12 \n\t" \
" addu %2,%2,$13 \n\t" \
" addu %2,%2,$12 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
#else #else
/* ISO C code */ /* ISO C code */
@ -270,6 +303,7 @@ asm( \
#endif #endif
#ifndef TFM_DEFINES
/* generic PxQ multiplier */ /* generic PxQ multiplier */
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
@ -326,21 +360,7 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
fp_copy(dst, C); fp_copy(dst, C);
} }
#include "fp_mul_comba_small_set.i" #endif
#include "fp_mul_comba_3.i"
#include "fp_mul_comba_4.i"
#include "fp_mul_comba_6.i"
#include "fp_mul_comba_7.i"
#include "fp_mul_comba_8.i"
#include "fp_mul_comba_9.i"
#include "fp_mul_comba_12.i"
#include "fp_mul_comba_17.i"
#include "fp_mul_comba_20.i"
#include "fp_mul_comba_24.i"
#include "fp_mul_comba_28.i"
#include "fp_mul_comba_32.i"
#include "fp_mul_comba_48.i"
#include "fp_mul_comba_64.i"
/* $Source$ */ /* $Source$ */
/* $Revision$ */ /* $Revision$ */

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL12 #ifdef TFM_MUL12
void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL17 #ifdef TFM_MUL17
void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL20 #ifdef TFM_MUL20
void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL24 #ifdef TFM_MUL24
void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL28 #ifdef TFM_MUL28
void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL3 #ifdef TFM_MUL3
void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL32 #ifdef TFM_MUL32
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL4 #ifdef TFM_MUL4
void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL48 #ifdef TFM_MUL48
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL6 #ifdef TFM_MUL6
void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL64 #ifdef TFM_MUL64
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL7 #ifdef TFM_MUL7
void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL8 #ifdef TFM_MUL8
void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL9 #ifdef TFM_MUL9
void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -1,3 +1,6 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#if defined(TFM_SMALL_SET) #if defined(TFM_SMALL_SET)
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
{ {

View File

@ -63,6 +63,7 @@ int fp_isprime(fp_int *a)
} }
/* now do 8 miller rabins */ /* now do 8 miller rabins */
fp_init(&b);
for (r = 0; r < 8; r++) { for (r = 0; r < 8; r++) {
fp_set(&b, primes[r]); fp_set(&b, primes[r]);
fp_prime_miller_rabin(a, &b, &res); fp_prime_miller_rabin(a, &b, &res);

View File

@ -14,7 +14,6 @@
#define TFM_X86 #define TFM_X86
#endif #endif
#if defined(TFM_X86) #if defined(TFM_X86)
/* x86-32 optimized */ /* x86-32 optimized */
@ -390,25 +389,25 @@ asm( \
/* multiplies point i and j, updates carry "c1" and digit c2 */ /* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \ #define SQRADD(i, j) \
asm( \ asm( \
" mulld 16,%6,%6 \n\t" \ " mulld r16,%6,%6 \n\t" \
" addc %0,%0,16 \n\t" \ " addc %0,%0,r16 \n\t" \
" mulhdu 16,%6,%6 \n\t" \ " mulhdu r16,%6,%6 \n\t" \
" adde %1,%1,16 \n\t" \ " adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
/* for squaring some of the terms are doubled... */ /* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \ #define SQRADD2(i, j) \
asm( \ asm( \
" mulld 16,%6,%7 \n\t" \ " mulld r16,%6,%7 \n\t" \
" mulhdu 17,%6,%7 \n\t" \ " mulhdu r17,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \ " addc %0,%0,r16 \n\t" \
" adde %1,%1,17 \n\t" \ " adde %1,%1,r17 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
" addc %0,%0,16 \n\t" \ " addc %0,%0,r16 \n\t" \
" adde %1,%1,17 \n\t" \ " adde %1,%1,r17 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc"); :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
#define SQRADDSC(i, j) \ #define SQRADDSC(i, j) \
asm( \ asm( \
@ -419,12 +418,12 @@ asm( \
#define SQRADDAC(i, j) \ #define SQRADDAC(i, j) \
asm( \ asm( \
" mulld 16,%6,%7 \n\t" \ " mulld r16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \ " addc %0,%0,r16 \n\t" \
" mulhdu 16,%6,%7 \n\t" \ " mulhdu r16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \ " adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \ " addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc"); :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
#define SQRADDDB \ #define SQRADDDB \
asm( \ asm( \
@ -504,6 +503,113 @@ asm( \
" adc %2,%2,%5 \n\t" \ " adc %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
#elif defined(TFM_MIPS)
/* MIPS */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
asm( \
" multu %6,%6 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu %0,%0,$12 \n\t" \
" sltu $12,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $13,%1,$13 \n\t" \
" addu %1,%1,$12 \n\t" \
" sltu $12,%1,$12 \n\t" \
" addu %2,%2,$13 \n\t" \
" addu %2,%2,$12 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
asm( \
" multu %6,%7 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
\
" addu %0,%0,$12 \n\t" \
" sltu $14,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $15,%1,$13 \n\t" \
" addu %1,%1,$14 \n\t" \
" sltu $14,%1,$14 \n\t" \
" addu %2,%2,$15 \n\t" \
" addu %2,%2,$14 \n\t" \
\
" addu %0,%0,$12 \n\t" \
" sltu $14,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $15,%1,$13 \n\t" \
" addu %1,%1,$14 \n\t" \
" sltu $14,%1,$14 \n\t" \
" addu %2,%2,$15 \n\t" \
" addu %2,%2,$14 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
#define SQRADDSC(i, j) \
asm( \
" multu %6,%7 \n\t" \
" mflo %0 \n\t" \
" mfhi %1 \n\t" \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
#define SQRADDAC(i, j) \
asm( \
" multu %6,%7 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu %0,%0,$12 \n\t" \
" sltu $12,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $13,%1,$13 \n\t" \
" addu %1,%1,$12 \n\t" \
" sltu $12,%1,$12 \n\t" \
" addu %2,%2,$13 \n\t" \
" addu %2,%2,$12 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
#define SQRADDDB \
asm( \
" addu %0,%0,%3 \n\t" \
" sltu $10,%0,%3 \n\t" \
" addu %1,%1,$10 \n\t" \
" sltu $10,%1,$10 \n\t" \
" addu %1,%1,%4 \n\t" \
" sltu $11,%1,%4 \n\t" \
" addu %2,%2,$10 \n\t" \
" addu %2,%2,$11 \n\t" \
" addu %2,%2,%5 \n\t" \
\
" addu %0,%0,%3 \n\t" \
" sltu $10,%0,%3 \n\t" \
" addu %1,%1,$10 \n\t" \
" sltu $10,%1,$10 \n\t" \
" addu %1,%1,%4 \n\t" \
" sltu $11,%1,%4 \n\t" \
" addu %2,%2,$10 \n\t" \
" addu %2,%2,$11 \n\t" \
" addu %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
#else #else
@ -566,19 +672,6 @@ asm( \
#endif #endif
#include "fp_sqr_comba_generic.c" /* $Source$ */
#include "fp_sqr_comba_small_set.i" /* $Revision$ */
#include "fp_sqr_comba_3.i" /* $Date$ */
#include "fp_sqr_comba_4.i"
#include "fp_sqr_comba_6.i"
#include "fp_sqr_comba_7.i"
#include "fp_sqr_comba_8.i"
#include "fp_sqr_comba_9.i"
#include "fp_sqr_comba_12.i"
#include "fp_sqr_comba_17.i"
#include "fp_sqr_comba_20.i"
#include "fp_sqr_comba_24.i"
#include "fp_sqr_comba_28.i"
#include "fp_sqr_comba_32.i"
#include "fp_sqr_comba_48.i"
#include "fp_sqr_comba_64.i"

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR12 #ifdef TFM_SQR12
void fp_sqr_comba12(fp_int *A, fp_int *B) void fp_sqr_comba12(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[24], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[24], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -133,3 +139,6 @@ void fp_sqr_comba12(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR17 #ifdef TFM_SQR17
void fp_sqr_comba17(fp_int *A, fp_int *B) void fp_sqr_comba17(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[34], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[34], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -183,3 +189,6 @@ void fp_sqr_comba17(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR20 #ifdef TFM_SQR20
void fp_sqr_comba20(fp_int *A, fp_int *B) void fp_sqr_comba20(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[40], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[40], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -213,3 +219,6 @@ void fp_sqr_comba20(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR24 #ifdef TFM_SQR24
void fp_sqr_comba24(fp_int *A, fp_int *B) void fp_sqr_comba24(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[48], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[48], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -253,3 +259,6 @@ void fp_sqr_comba24(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR28 #ifdef TFM_SQR28
void fp_sqr_comba28(fp_int *A, fp_int *B) void fp_sqr_comba28(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[56], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[56], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -293,3 +299,6 @@ void fp_sqr_comba28(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR3 #ifdef TFM_SQR3
void fp_sqr_comba3(fp_int *A, fp_int *B) void fp_sqr_comba3(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[6], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[6], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -43,3 +49,6 @@ void fp_sqr_comba3(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR32 #ifdef TFM_SQR32
void fp_sqr_comba32(fp_int *A, fp_int *B) void fp_sqr_comba32(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -333,3 +339,6 @@ void fp_sqr_comba32(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR4 #ifdef TFM_SQR4
void fp_sqr_comba4(fp_int *A, fp_int *B) void fp_sqr_comba4(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[8], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[8], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -53,3 +59,6 @@ void fp_sqr_comba4(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR48 #ifdef TFM_SQR48
void fp_sqr_comba48(fp_int *A, fp_int *B) void fp_sqr_comba48(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -493,3 +499,6 @@ void fp_sqr_comba48(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR6 #ifdef TFM_SQR6
void fp_sqr_comba6(fp_int *A, fp_int *B) void fp_sqr_comba6(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[12], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[12], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -73,3 +79,6 @@ void fp_sqr_comba6(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR64 #ifdef TFM_SQR64
void fp_sqr_comba64(fp_int *A, fp_int *B) void fp_sqr_comba64(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -653,3 +659,6 @@ void fp_sqr_comba64(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,13 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR7 #ifdef TFM_SQR7
void fp_sqr_comba7(fp_int *A, fp_int *B) void fp_sqr_comba7(fp_int *A, fp_int *B)
{ {
fp_digit *a, b[14], c0, c1, c2, sc0, sc1, sc2; fp_digit *a, b[14], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp; a = A->dp;
COMBA_START; COMBA_START;
@ -83,3 +89,6 @@ void fp_sqr_comba7(fp_int *A, fp_int *B)
#endif #endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

Some files were not shown because too many files have changed in this diff Show More