From b5e034662234441931a9283eefaf2b66d43e8a31 Mon Sep 17 00:00:00 2001 From: Johnny Willemsen Date: Thu, 5 Feb 2015 12:09:04 +0100 Subject: [PATCH 1/9] Added GCC 4.9 configuratin --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7539be2..0b7cc91 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,12 +7,14 @@ env: - MYCC="gcc -m32" - MYCC="gcc-4.8" - MYCC="gcc-4.8 -m32" + - MYCC="gcc-4.9" + - MYCC="gcc-4.9 -m32" matrix: fast_finish: true before_script: - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test - sudo apt-get -qq update - - sudo apt-get install gcc-4.8-multilib gcc-multilib build-essential + - sudo apt-get install gcc-4.9-multilib gcc-4.8-multilib gcc-multilib build-essential after_failure: - cat test_gcc_1.txt - cat test_std.txt From 5f37f875b2c8cb17200030af46005e08da5211bb Mon Sep 17 00:00:00 2001 From: Johnny Willemsen Date: Thu, 5 Feb 2015 12:11:49 +0100 Subject: [PATCH 2/9] Attempt to also build shared using travic-ci --- .travis.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0b7cc91..95a452c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,20 @@ language: c compiler: - gcc -script: CC="${MYCC}" make test >test_gcc_1.txt 2>test_gcc_2.txt && ./test >test_std.txt 2>test_err.txt +script: CC="${MYCC}" make ${SHARED} test >test_gcc_1.txt 2>test_gcc_2.txt && ./test >test_std.txt 2>test_err.txt env: - - MYCC="gcc" - - MYCC="gcc -m32" - - MYCC="gcc-4.8" - - MYCC="gcc-4.8 -m32" - - MYCC="gcc-4.9" - - MYCC="gcc-4.9 -m32" + - MYCC="gcc" SHARED="" + - MYCC="gcc -m32" SHARED="" + - MYCC="gcc-4.8" SHARED="" + - MYCC="gcc-4.8 -m32" SHARED="" + - MYCC="gcc-4.9" SHARED="" + - MYCC="gcc-4.9 -m32" SHARED="" + - MYCC="gcc" SHARED="-f makefile.shared" + - MYCC="gcc -m32" SHARED="-f makefile.shared" + - MYCC="gcc-4.8" SHARED="-f makefile.shared" + - MYCC="gcc-4.8 -m32" SHARED="-f makefile.shared" + - MYCC="gcc-4.9" SHARED="-f makefile.shared" + - MYCC="gcc-4.9 -m32" SHARED="-f makefile.shared" matrix: fast_finish: true before_script: From 6231d132dedbda1c0c1db209ff9189d87e0426c7 Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Fri, 6 Feb 2015 14:44:33 +0100 Subject: [PATCH 3/9] makefile.shared: fix linker step --- makefile.shared | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile.shared b/makefile.shared index dc118d7..90d7f91 100644 --- a/makefile.shared +++ b/makefile.shared @@ -87,7 +87,7 @@ $(OBJECTS): $(HEADERS) $(LTCOMPILE) $(CFLAGS) $(LDFLAGS) -o $@ -c $< $(LIBNAME): $(OBJECTS) - libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]lo" | xargs` -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION) + libtool --silent --mode=link --tag=CC $(CC) $(CFLAGS) `find . -type f | grep "[.]lo" | xargs` -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION) install: $(LIBNAME) install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH) From b1b9a82d97dfcdc087d99ef8323bf11f4755ab1e Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Sun, 8 Feb 2015 11:50:39 +0100 Subject: [PATCH 4/9] demo: split up test to 'test' and a new 'timing' build target --- .gitignore | 2 + demo/test.c | 402 ------------------------------- demo/timing.c | 625 ++++++++++++++++++++++++++++++++++++++++++++++++ makefile | 4 +- makefile.shared | 4 + 5 files changed, 633 insertions(+), 404 deletions(-) create mode 100644 demo/timing.c diff --git a/.gitignore b/.gitignore index e8dc23c..89863b0 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ stest stest.exe rsatest rsatest.exe +timing +timing.exe diff --git a/demo/test.c b/demo/test.c index 43981d5..5fb7aa8 100644 --- a/demo/test.c +++ b/demo/test.c @@ -231,408 +231,6 @@ int main(void) } printf("\n\n"); -#if 1 - -t1 = TIMFUNC(); -sleep(1); -printf("Ticks per second: %llu\n", TIMFUNC() - t1); - - /* do some timings... */ - printf("Addition:\n"); - for (t = 2; t <= FP_SIZE/2; t += 2) { - fp_zero(&a); - fp_zero(&b); - fp_zero(&c); - for (ix = 0; ix < t; ix++) { - a.dp[ix] = ix; - b.dp[ix] = ix; - } - a.used = t; - b.used = t; - t2 = -1; - for (ix = 0; ix < 25000; ++ix) { - t1 = TIMFUNC(); - fp_add(&a, &b, &c); fp_add(&a, &b, &c); - fp_add(&a, &b, &c); fp_add(&a, &b, &c); - fp_add(&a, &b, &c); fp_add(&a, &b, &c); - fp_add(&a, &b, &c); fp_add(&a, &b, &c); - t2 = (TIMFUNC() - t1)>>3; - if (t1>7; - if (t1>7; - if (t1>6; - if (t1>6; - fp_copy(&b, &c); - fp_copy(&b, &d); - if (t1>1; - fp_copy(&b, &c); - fp_copy(&b, &d); - if (t1 +#include +#include + +/* RDTSC from Scott Duplichan */ +static ulong64 TIMFUNC(void) +{ +#if defined __GNUC__ + #if defined(INTEL_CC) + ulong64 a; + asm ("rdtsc":"=A"(a)); + return a; + #elif defined(__i386__) || defined(__x86_64__) + /* version from http://www.mcs.anl.gov/~kazutomo/rdtsc.html + * the old code always got a warning issued by gcc, clang did not complain... + */ + unsigned hi, lo; + __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); + return ((ulong64)lo)|( ((ulong64)hi)<<32); + #elif defined(TFM_PPC32) + unsigned long a, b; + __asm__ __volatile__ ("mftbu %1 \nmftb %0\n":"=r"(a), "=r"(b)); + return (((ulong64)b) << 32ULL) | ((ulong64)a); + #elif defined(TFM_AVR32) + FILE *in; + char buf[20]; + in = fopen("/sys/devices/system/cpu/cpu0/pccycles", "r"); + fgets(buf, 20, in); + fclose(in); + return strtoul(buf, NULL, 10); + #else /* gcc-IA64 version */ + unsigned long result; + __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); + while (__builtin_expect ((int) result == -1, 0)) + __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); + return result; + #endif + +// Microsoft and Intel Windows compilers +#elif defined _M_IX86 + __asm rdtsc +#elif defined _M_AMD64 + return __rdtsc (); +#elif defined _M_IA64 + #if defined __INTEL_COMPILER + #include + #endif + return __getReg (3116); +#else + #error need rdtsc function for this build +#endif +} + +static ulong64 ticks; +static const char* p_str; + +static void print_start(const char* s) +{ + p_str = s; +} + +static void print_line(ulong64 b, ulong64 t) +{ + printf("%llu;%s;%llu;%llu\n", ticks, p_str, b, t); +} + +int main(void) +{ + fp_int a,b,c,d; + ulong64 t1, t2; + fp_digit fp; + unsigned long t, ix; + + t1 = TIMFUNC(); + sleep(1); + ticks = TIMFUNC() - t1; + fprintf(stderr, "Ticks per second: %llu\n", ticks); + + printf("Ticks/sec;Algorithm;bits;time\n"); + /* do some timings... */ + print_start("Addition"); + for (t = 2; t <= FP_SIZE / 2; t += 2) { + fp_zero(&a); + fp_zero(&b); + fp_zero(&c); + for (ix = 0; ix < t; ix++) { + a.dp[ix] = ix; + b.dp[ix] = ix; + } + a.used = t; + b.used = t; + t2 = -1; + for (ix = 0; ix < 25000; ++ix) { + t1 = TIMFUNC(); + fp_add(&a, &b, &c); + fp_add(&a, &b, &c); + fp_add(&a, &b, &c); + fp_add(&a, &b, &c); + fp_add(&a, &b, &c); + fp_add(&a, &b, &c); + fp_add(&a, &b, &c); + fp_add(&a, &b, &c); + t2 = (TIMFUNC() - t1) >> 3; + if (t1 < t2) { + --ix; + t2 = t1; + } + } + print_line(t * DIGIT_BIT, t2); + } + print_start("Multiplication"); + for (t = 2; t < FP_SIZE / 2; t += 2) { + fp_zero(&a); + fp_zero(&b); + fp_zero(&c); + for (ix = 0; ix < t; ix++) { + a.dp[ix] = ix; + b.dp[ix] = ix; + } + a.used = t; + b.used = t; + t2 = -1; + for (ix = 0; ix < 100; ++ix) { + t1 = TIMFUNC(); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + fp_mul(&a, &b, &c); + t2 = (TIMFUNC() - t1) >> 7; + if (t1 < t2) { + --ix; + t2 = t1; + } + } + print_line(t * DIGIT_BIT, t2); + } + + print_start("Squaring"); + for (t = 2; t < FP_SIZE / 2; t += 2) { + fp_zero(&a); + fp_zero(&b); + for (ix = 0; ix < t; ix++) { + a.dp[ix] = ix; + } + a.used = t; + t2 = -1; + for (ix = 0; ix < 100; ++ix) { + t1 = TIMFUNC(); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + fp_sqr(&a, &b); + t2 = (TIMFUNC() - t1) >> 7; + if (t1 < t2) { + --ix; + t2 = t1; + } + } + print_line(t * DIGIT_BIT, t2); + } + + print_start("Invmod"); + for (t = 2; t < FP_SIZE / 2; t += 2) { + fp_zero(&a); + for (ix = 0; ix < t; ix++) { + a.dp[ix] = ix | 1; + } + a.used = t; + fp_zero(&b); + for (ix = 0; ix < t; ix++) { + b.dp[ix] = rand(); + } + b.used = t; + fp_clamp(&b); + fp_zero(&c); + t2 = -1; + for (ix = 0; ix < 100; ++ix) { + t1 = TIMFUNC(); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + fp_invmod(&b, &a, &c); + t2 = (TIMFUNC() - t1) >> 6; + if (t1 < t2) { + --ix; + t2 = t1; + } + } + print_line(t * DIGIT_BIT, t2); + } + + print_start("Montgomery"); + for (t = 2; t <= (FP_SIZE / 2) - 4; t += 2) { + // printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2); + fp_zero(&a); + for (ix = 0; ix < t; ix++) { + a.dp[ix] = ix | 1; + } + a.used = t; + + fp_montgomery_setup(&a, &fp); + fp_sub_d(&a, 3, &b); + fp_sqr(&b, &b); + fp_copy(&b, &c); + fp_copy(&b, &d); + + t2 = -1; + for (ix = 0; ix < 100; ++ix) { + t1 = TIMFUNC(); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + fp_montgomery_reduce(&c, &a, fp); + fp_montgomery_reduce(&d, &a, fp); + t2 = (TIMFUNC() - t1) >> 6; + fp_copy(&b, &c); + fp_copy(&b, &d); + if (t1 < t2) { + --ix; + t2 = t1; + } + } + print_line(t * DIGIT_BIT, t2); + } + + print_start("Exptmod"); + + for (t = 512 / DIGIT_BIT; t <= (FP_SIZE / 2) - 2; t += 256 / DIGIT_BIT) { + fp_zero(&a); + fp_zero(&b); + fp_zero(&c); + for (ix = 0; ix < t; ix++) { + a.dp[ix] = ix + 1; + b.dp[ix] = (fp_digit) rand() * (fp_digit) rand(); + c.dp[ix] = ix; + } + a.used = t; + b.used = t; + c.used = t; + + t2 = -1; + for (ix = 0; ix < 500; ++ix) { + t1 = TIMFUNC(); + fp_exptmod(&c, &b, &a, &d); + fp_exptmod(&c, &b, &a, &d); + t2 = (TIMFUNC() - t1) >> 1; + fp_copy(&b, &c); + fp_copy(&b, &d); + if (t1 < t2) { + t2 = t1; + --ix; + } + } + print_line(t * DIGIT_BIT, t2); + } + return 0; +} diff --git a/makefile b/makefile index 44aa0cc..6fb2137 100644 --- a/makefile +++ b/makefile @@ -111,8 +111,8 @@ demo/test.o: CFLAGS+=-Wno-unused-result test: $(LIBNAME) demo/test.o $(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test -timing: $(LIBNAME) demo/test.o - $(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test +timing: $(LIBNAME) demo/timing.o + $(CC) $(CFLAGS) demo/timing.o $(LIBNAME) $(PROF) -o timing profiled: CC="$(CC)" PREFIX="${PREFIX} CFLAGS="${CFLAGS} -fprofile-generate" MAKE=${MAKE} ${MAKE} timing diff --git a/makefile.shared b/makefile.shared index 90d7f91..54e7d45 100644 --- a/makefile.shared +++ b/makefile.shared @@ -105,6 +105,10 @@ test: $(LIBNAME) demo/test.o stest: $(LIBNAME) demo/stest.o $(LT) --mode=link --tag=CC $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o stest demo/stest.o $(LIBNAME) +.PHONY: timing +timing: $(LIBNAME) demo/timing.o + $(LT) --mode=link --tag=CC $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o timing demo/timing.o $(LIBNAME) + # $Source$ # $Revision$ # $Date$ From 77be93c2013675a28993e427e4753ca40b43f08f Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Sun, 15 Feb 2015 14:50:20 +0100 Subject: [PATCH 5/9] fix compile errors --- src/mul/fp_mul_comba.c | 4 +++- src/sqr/fp_sqr_comba.c | 2 +- src/sqr/fp_sqr_comba_generic.c | 7 +++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/mul/fp_mul_comba.c b/src/mul/fp_mul_comba.c index 1bec1e1..ea95443 100644 --- a/src/mul/fp_mul_comba.c +++ b/src/mul/fp_mul_comba.c @@ -349,7 +349,9 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) /* execute loop */ COMBA_FORWARD; for (iz = 0; iz < iy; ++iz) { - MULADD(*tmpx++, *tmpy--); + fp_digit _tmpx = *tmpx++; + fp_digit _tmpy = *tmpy--; + MULADD(_tmpx, _tmpy); } /* store term */ diff --git a/src/sqr/fp_sqr_comba.c b/src/sqr/fp_sqr_comba.c index 9a63b4f..49525be 100644 --- a/src/sqr/fp_sqr_comba.c +++ b/src/sqr/fp_sqr_comba.c @@ -109,7 +109,7 @@ asm( \ "addq %%rax,%0 \n\t" \ "adcq %%rdx,%1 \n\t" \ "adcq $0,%2 \n\t" \ - :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc"); + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "x"(i) :"%rax","%rdx","cc"); #define SQRADD2(i, j) \ asm( \ diff --git a/src/sqr/fp_sqr_comba_generic.c b/src/sqr/fp_sqr_comba_generic.c index c5d1618..168bd9d 100644 --- a/src/sqr/fp_sqr_comba_generic.c +++ b/src/sqr/fp_sqr_comba_generic.c @@ -68,12 +68,15 @@ void fp_sqr_comba(fp_int *A, fp_int *B) /* execute loop */ for (iz = 0; iz < iy; iz++) { - SQRADD2(*tmpx++, *tmpy--); + fp_digit _tmpx = *tmpx++; + fp_digit _tmpy = *tmpy--; + SQRADD2(_tmpx, _tmpy); } /* even columns have the square term in them */ if ((ix&1) == 0) { - SQRADD(A->dp[ix>>1], A->dp[ix>>1]); + fp_digit _a_dp = A->dp[ix>>1]; + SQRADD(_a_dp, A->dp[ix>>1]); } /* store it */ From cdc82aefe17e108bce67bed276387cc3e4040414 Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Sun, 15 Feb 2015 14:53:09 +0100 Subject: [PATCH 6/9] add testme.sh --- testme.sh | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 testme.sh diff --git a/testme.sh b/testme.sh new file mode 100755 index 0000000..e97efdc --- /dev/null +++ b/testme.sh @@ -0,0 +1,35 @@ +#!/bin/bash -e +# +# Can be run with e.g. ./testme.sh "gcc-4.8 gcc-4.9", defaults to ./testme.sh "gcc" + +_runtest() +{ + echo -n "Run test $1 $2" + trap 'echo " - build not successful, errors are:" && cat test_gcc_errors.txt' INT TERM + make clean > /dev/null + CC="${1}" make test -j9 $2 > /dev/null 2>test_gcc_errors.txt + trap - INT TERM + local outfile="test_$(echo ${1}${2} | tr -d '\"' | tr ' ' '_').txt" + trap 'echo " - tests not successful, failed at:" && tail ${outfile}' INT TERM + ./test > ${outfile} + echo " successful" + trap - INT TERM +} + +gccopt="-m32 -m64 -mx32" +if [ $# -ge 1 ] +then + gccver=$1 +else + gccver="gcc" +fi + +for gopt in ${gccopt}; +do + for gccv in ${gccver}; + do + _runtest "${gccv} ${gopt}" "-f makefile.shared" + _runtest "${gccv} ${gopt}" "" + done +done + From d02974e02f4006f04b42e7d78cba126a338b862e Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Sun, 15 Feb 2015 14:57:21 +0100 Subject: [PATCH 7/9] demo/test: fix warnings --- demo/test.c | 53 ----------------------------------------------------- 1 file changed, 53 deletions(-) diff --git a/demo/test.c b/demo/test.c index 5fb7aa8..3c5e70c 100644 --- a/demo/test.c +++ b/demo/test.c @@ -25,57 +25,6 @@ int myrng(unsigned char *dst, int len, void *dat) return len; } -#ifndef TESTING -/* RDTSC from Scott Duplichan */ -static ulong64 TIMFUNC (void) - { - #if defined __GNUC__ - #if defined(INTEL_CC) - ulong64 a; - asm ("rdtsc":"=A"(a)); - return a; - #elif defined(__i386__) || defined(__x86_64__) - /* version from http://www.mcs.anl.gov/~kazutomo/rdtsc.html - * the old code always got a warning issued by gcc, clang did not complain... - */ - unsigned hi, lo; - __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); - return ((ulong64)lo)|( ((ulong64)hi)<<32); - #elif defined(TFM_PPC32) - unsigned long a, b; - __asm__ __volatile__ ("mftbu %1 \nmftb %0\n":"=r"(a), "=r"(b)); - return (((ulong64)b) << 32ULL) | ((ulong64)a); - #elif defined(TFM_AVR32) - FILE *in; - char buf[20]; - in = fopen("/sys/devices/system/cpu/cpu0/pccycles", "r"); - fgets(buf, 20, in); - fclose(in); - return strtoul(buf, NULL, 10); - #else /* gcc-IA64 version */ - unsigned long result; - __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); - while (__builtin_expect ((int) result == -1, 0)) - __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); - return result; - #endif - - // Microsoft and Intel Windows compilers - #elif defined _M_IX86 - __asm rdtsc - #elif defined _M_AMD64 - return __rdtsc (); - #elif defined _M_IA64 - #if defined __INTEL_COMPILER - #include - #endif - return __getReg (3116); - #else - #error need rdtsc function for this build - #endif - } -#endif - char cmd[4096], buf[4096]; int main(void) @@ -84,10 +33,8 @@ int main(void) unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n, div2_n, mul2_n, add_d_n, sub_d_n, mul_d_n, cnt, rr, ix; #ifndef TESTING - unsigned long t; fp_digit fp; int n, err; - ulong64 t1, t2; #endif srand(time(NULL)); From e4f59d4d61df03269c7e2b69b29ea9ea6f13bd88 Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Sun, 15 Feb 2015 15:13:20 +0100 Subject: [PATCH 8/9] create new make target test_standalone --- demo/test.c | 17 +++++++++++++---- makefile | 6 ++++++ makefile.shared | 8 ++++++++ testme.sh | 2 +- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/demo/test.c b/demo/test.c index 3c5e70c..5ee2d1e 100644 --- a/demo/test.c +++ b/demo/test.c @@ -3,6 +3,11 @@ #include #include + +#ifndef TFM_DEMO_TEST_VS_MTEST +#define TFM_DEMO_TEST_VS_MTEST 1 +#endif + void draw(fp_int *a) { int x; @@ -30,9 +35,11 @@ int myrng(unsigned char *dst, int len, void *dat) int main(void) { fp_int a,b,c,d,e,f; + unsigned long ix; +#if TFM_DEMO_TEST_VS_MTEST unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n, - div2_n, mul2_n, add_d_n, sub_d_n, mul_d_n, cnt, rr, ix; -#ifndef TESTING + div2_n, mul2_n, add_d_n, sub_d_n, mul_d_n, cnt, rr; +#else fp_digit fp; int n, err; #endif @@ -42,7 +49,7 @@ int main(void) fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f); fp_zero(&a); -#ifndef TESTING +#if TFM_DEMO_TEST_VS_MTEST == 0 draw(&a); @@ -179,7 +186,8 @@ int main(void) printf("\n\n"); return 0; -#endif + +#else fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f); fp_zero(&a); @@ -422,6 +430,7 @@ draw(&a);draw(&b);draw(&c);draw(&d); } } +#endif } diff --git a/makefile b/makefile index 6fb2137..de90f66 100644 --- a/makefile +++ b/makefile @@ -111,6 +111,12 @@ demo/test.o: CFLAGS+=-Wno-unused-result test: $(LIBNAME) demo/test.o $(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test +test_standalone: CFLAGS+=-DTFM_DEMO_TEST_VS_MTEST=0 + +.PHONY: test_standalone +test_standalone: $(LIBNAME) demo/test.o + $(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test + timing: $(LIBNAME) demo/timing.o $(CC) $(CFLAGS) demo/timing.o $(LIBNAME) $(PROF) -o timing diff --git a/makefile.shared b/makefile.shared index 54e7d45..39d2148 100644 --- a/makefile.shared +++ b/makefile.shared @@ -98,10 +98,18 @@ install: $(LIBNAME) mtest/mtest: mtest/mtest.c cd mtest ; make mtest +demo/test.o: CFLAGS+=-Wno-unused-result + .PHONY: test test: $(LIBNAME) demo/test.o $(LT) --mode=link --tag=CC $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o test demo/test.o $(LIBNAME) +test_standalone: CFLAGS+=-DTFM_DEMO_TEST_VS_MTEST=0 + +.PHONY: test_standalone +test_standalone: $(LIBNAME) demo/test.o + $(LT) --mode=link --tag=CC $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o test demo/test.o $(LIBNAME) + stest: $(LIBNAME) demo/stest.o $(LT) --mode=link --tag=CC $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o stest demo/stest.o $(LIBNAME) diff --git a/testme.sh b/testme.sh index e97efdc..90fa34f 100755 --- a/testme.sh +++ b/testme.sh @@ -7,7 +7,7 @@ _runtest() echo -n "Run test $1 $2" trap 'echo " - build not successful, errors are:" && cat test_gcc_errors.txt' INT TERM make clean > /dev/null - CC="${1}" make test -j9 $2 > /dev/null 2>test_gcc_errors.txt + CC="${1}" make test_standalone -j9 $2 > /dev/null 2>test_gcc_errors.txt trap - INT TERM local outfile="test_$(echo ${1}${2} | tr -d '\"' | tr ' ' '_').txt" trap 'echo " - tests not successful, failed at:" && tail ${outfile}' INT TERM From cfa59b262757c02323d4ec5d56bf4eb2043d8669 Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Sun, 15 Feb 2015 15:17:36 +0100 Subject: [PATCH 9/9] travis.yml: fix build target --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 95a452c..8dbc64e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: c compiler: - gcc -script: CC="${MYCC}" make ${SHARED} test >test_gcc_1.txt 2>test_gcc_2.txt && ./test >test_std.txt 2>test_err.txt +script: CC="${MYCC}" make ${SHARED} test_standalone >test_gcc_1.txt 2>test_gcc_2.txt && ./test >test_std.txt 2>test_err.txt env: - MYCC="gcc" SHARED="" - MYCC="gcc -m32" SHARED=""